codex-rs/core/src/codex.rs

use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Debug;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
use std::time::Duration;

use crate::AuthManager;
use crate::client_common::REVIEW_PROMPT;
use crate::event_mapping::map_response_item_to_event_messages;
use crate::function_tool::FunctionCallError;
use crate::review_format::format_review_findings_block;
use crate::terminal;
use crate::user_notification::UserNotifier;
use async_channel::Receiver;
use async_channel::Sender;
use codex_apply_patch::ApplyPatchAction;
use codex_apply_patch::MaybeApplyPatchVerified;
use codex_apply_patch::maybe_parse_apply_patch_verified;
use codex_protocol::mcp_protocol::ConversationId;
use codex_protocol::protocol::ConversationPathResponseEvent;
use codex_protocol::protocol::ExitedReviewModeEvent;
use codex_protocol::protocol::ReviewRequest;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::TaskStartedEvent;
use codex_protocol::protocol::TurnAbortReason;
use codex_protocol::protocol::TurnContextItem;
use futures::prelude::*;
use mcp_types::CallToolResult;
use serde::Deserialize;
use serde::Serialize;
use serde_json;
use serde_json::Value;
use tokio::sync::Mutex;
use tokio::sync::oneshot;
use tracing::debug;
use tracing::error;
use tracing::info;
use tracing::trace;
use tracing::warn;

use crate::ModelProviderInfo;
use crate::apply_patch;
use crate::apply_patch::ApplyPatchExec;
use crate::apply_patch::CODEX_APPLY_PATCH_ARG1;
use crate::apply_patch::InternalApplyPatchInvocation;
use crate::apply_patch::convert_apply_patch_to_protocol;
use crate::client::ModelClient;
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::config::Config;
use crate::config_types::ShellEnvironmentPolicy;
use crate::conversation_history::ConversationHistory;
use crate::environment_context::EnvironmentContext;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
use crate::error::SandboxErr;
use crate::error::get_error_message_ui;
use crate::exec::ExecParams;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StdoutStream;
use crate::exec::StreamOutput;
use crate::exec::process_exec_tool_call;
use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
use crate::exec_command::ExecCommandParams;
use crate::exec_command::ExecSessionManager;
use crate::exec_command::WRITE_STDIN_TOOL_NAME;
use crate::exec_command::WriteStdinParams;
use crate::exec_env::create_env;
use crate::mcp_connection_manager::McpConnectionManager;
use crate::mcp_tool_call::handle_mcp_tool_call;
use crate::model_family::find_family_for_model;
use crate::openai_model_info::get_model_info;
use crate::openai_tools::ApplyPatchToolArgs;
use crate::openai_tools::ToolsConfig;
use crate::openai_tools::ToolsConfigParams;
use crate::openai_tools::get_openai_tools;
use crate::parse_command::parse_command;
use crate::plan_tool::handle_update_plan;
use crate::project_doc::get_user_instructions;
use crate::protocol::AgentMessageDeltaEvent;
use crate::protocol::AgentReasoningDeltaEvent;
use crate::protocol::AgentReasoningRawContentDeltaEvent;
use crate::protocol::AgentReasoningSectionBreakEvent;
use crate::protocol::ApplyPatchApprovalRequestEvent;
use crate::protocol::AskForApproval;
use crate::protocol::BackgroundEventEvent;
use crate::protocol::ErrorEvent;
use crate::protocol::Event;
use crate::protocol::EventMsg;
use crate::protocol::ExecApprovalRequestEvent;
use crate::protocol::ExecCommandBeginEvent;
use crate::protocol::ExecCommandEndEvent;
use crate::protocol::FileChange;
use crate::protocol::InputItem;
use crate::protocol::ListCustomPromptsResponseEvent;
use crate::protocol::Op;
use crate::protocol::PatchApplyBeginEvent;
use crate::protocol::PatchApplyEndEvent;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::ReviewDecision;
use crate::protocol::ReviewOutputEvent;
use crate::protocol::SandboxPolicy;
use crate::protocol::SessionConfiguredEvent;
use crate::protocol::StreamErrorEvent;
use crate::protocol::Submission;
use crate::protocol::TokenCountEvent;
use crate::protocol::TokenUsage;
use crate::protocol::TurnDiffEvent;
use crate::protocol::WebSearchBeginEvent;
use crate::rollout::RolloutRecorder;
use crate::rollout::RolloutRecorderParams;
use crate::safety::SafetyCheck;
use crate::safety::assess_command_safety;
use crate::safety::assess_safety_for_untrusted_command;
use crate::shell;
use crate::state::ActiveTurn;
use crate::state::SessionServices;
use crate::tasks::CompactTask;
use crate::tasks::RegularTask;
use crate::tasks::ReviewTask;
use crate::turn_diff_tracker::TurnDiffTracker;
use crate::unified_exec::UnifiedExecSessionManager;
use crate::user_instructions::UserInstructions;
use crate::user_notification::UserNotification;
use crate::util::backoff;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_otel::otel_event_manager::ToolDecisionSource;
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use codex_protocol::custom_prompts::CustomPrompt;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::LocalShellAction;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::models::ShellToolCallParams;
use codex_protocol::protocol::InitialHistory;

pub mod compact;
use self::compact::build_compacted_history;
use self::compact::collect_user_messages;

/// The high-level interface to the Codex system.
/// It operates as a queue pair where you send submissions and receive events.
pub struct Codex {
    next_id: AtomicU64,
    tx_sub: Sender<Submission>,
    rx_event: Receiver<Event>,
}

/// Wrapper returned by [`Codex::spawn`] containing the spawned [`Codex`],
/// the submission id for the initial `ConfigureSession` request and the
/// unique session id.
pub struct CodexSpawnOk {
    pub codex: Codex,
    pub conversation_id: ConversationId,
}

pub(crate) const INITIAL_SUBMIT_ID: &str = "";
pub(crate) const SUBMISSION_CHANNEL_CAPACITY: usize = 64;

// Model-formatting limits: clients get full streams; oonly content sent to the model is truncated.
pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;

impl Codex {
    /// Spawn a new [`Codex`] and initialize the session.
    pub async fn spawn(
        config: Config,
        auth_manager: Arc<AuthManager>,
        conversation_history: InitialHistory,
    ) -> CodexResult<CodexSpawnOk> {
        let (tx_sub, rx_sub) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
        let (tx_event, rx_event) = async_channel::unbounded();

        let user_instructions = get_user_instructions(&config).await;

        let config = Arc::new(config);

        let configure_session = ConfigureSession {
            provider: config.model_provider.clone(),
            model: config.model.clone(),
            model_reasoning_effort: config.model_reasoning_effort,
            model_reasoning_summary: config.model_reasoning_summary,
            user_instructions,
            base_instructions: config.base_instructions.clone(),
            approval_policy: config.approval_policy,
            sandbox_policy: config.sandbox_policy.clone(),
            notify: UserNotifier::new(config.notify.clone()),
            cwd: config.cwd.clone(),
        };

        // Generate a unique ID for the lifetime of this Codex session.
        let (session, turn_context) = Session::new(
            configure_session,
            config.clone(),
            auth_manager.clone(),
            tx_event.clone(),
            conversation_history,
        )
        .await
        .map_err(|e| {
            error!("Failed to create session: {e:#}");
            CodexErr::InternalAgentDied
        })?;
        let conversation_id = session.conversation_id;

        // This task will run until Op::Shutdown is received.
        tokio::spawn(submission_loop(session, turn_context, config, rx_sub));
        let codex = Codex {
            next_id: AtomicU64::new(0),
            tx_sub,
            rx_event,
        };

        Ok(CodexSpawnOk {
            codex,
            conversation_id,
        })
    }

    /// Submit the `op` wrapped in a `Submission` with a unique ID.
    pub async fn submit(&self, op: Op) -> CodexResult<String> {
        let id = self
            .next_id
            .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
            .to_string();
        let sub = Submission { id: id.clone(), op };
        self.submit_with_id(sub).await?;
        Ok(id)
    }

    /// Use sparingly: prefer `submit()` so Codex is responsible for generating
    /// unique IDs for each submission.
    pub async fn submit_with_id(&self, sub: Submission) -> CodexResult<()> {
        self.tx_sub
            .send(sub)
            .await
            .map_err(|_| CodexErr::InternalAgentDied)?;
        Ok(())
    }

    pub async fn next_event(&self) -> CodexResult<Event> {
        let event = self
            .rx_event
            .recv()
            .await
            .map_err(|_| CodexErr::InternalAgentDied)?;
        Ok(event)
    }
}

use crate::state::SessionState;

/// Context for an initialized model agent
///
/// A session has at most 1 running task at a time, and can be interrupted by user input.
pub(crate) struct Session {
    conversation_id: ConversationId,
    tx_event: Sender<Event>,
    state: Mutex<SessionState>,
    pub(crate) active_turn: Mutex<Option<ActiveTurn>>,
    services: SessionServices,
    next_internal_sub_id: AtomicU64,
}

/// The context needed for a single turn of the conversation.
#[derive(Debug)]
pub(crate) struct TurnContext {
    pub(crate) client: ModelClient,
    /// The session's current working directory. All relative paths provided by
    /// the model as well as sandbox policies are resolved against this path
    /// instead of `std::env::current_dir()`.
    pub(crate) cwd: PathBuf,
    pub(crate) base_instructions: Option<String>,
    pub(crate) user_instructions: Option<String>,
    pub(crate) approval_policy: AskForApproval,
    pub(crate) sandbox_policy: SandboxPolicy,
    pub(crate) shell_environment_policy: ShellEnvironmentPolicy,
    pub(crate) tools_config: ToolsConfig,
    pub(crate) is_review_mode: bool,
    pub(crate) final_output_json_schema: Option<Value>,
}

impl TurnContext {
    fn resolve_path(&self, path: Option<String>) -> PathBuf {
        path.as_ref()
            .map(PathBuf::from)
            .map_or_else(|| self.cwd.clone(), |p| self.cwd.join(p))
    }
}

/// Configure the model session.
struct ConfigureSession {
    /// Provider identifier ("openai", "openrouter", ...).
    provider: ModelProviderInfo,

    /// If not specified, server will use its default model.
    model: String,

    model_reasoning_effort: Option<ReasoningEffortConfig>,
    model_reasoning_summary: ReasoningSummaryConfig,

    /// Model instructions that are appended to the base instructions.
    user_instructions: Option<String>,

    /// Base instructions override.
    base_instructions: Option<String>,

    /// When to escalate for approval for execution
    approval_policy: AskForApproval,
    /// How to sandbox commands executed in the system
    sandbox_policy: SandboxPolicy,

    notify: UserNotifier,

    /// Working directory that should be treated as the *root* of the
    /// session. All relative paths supplied by the model as well as the
    /// execution sandbox are resolved against this directory **instead**
    /// of the process-wide current working directory. CLI front-ends are
    /// expected to expand this to an absolute path before sending the
    /// `ConfigureSession` operation so that the business-logic layer can
    /// operate deterministically.
    cwd: PathBuf,
}

impl Session {
    async fn new(
        configure_session: ConfigureSession,
        config: Arc<Config>,
        auth_manager: Arc<AuthManager>,
        tx_event: Sender<Event>,
        initial_history: InitialHistory,
    ) -> anyhow::Result<(Arc<Self>, TurnContext)> {
        let ConfigureSession {
            provider,
            model,
            model_reasoning_effort,
            model_reasoning_summary,
            user_instructions,
            base_instructions,
            approval_policy,
            sandbox_policy,
            notify,
            cwd,
        } = configure_session;
        debug!("Configuring session: model={model}; provider={provider:?}");
        if !cwd.is_absolute() {
            return Err(anyhow::anyhow!("cwd is not absolute: {cwd:?}"));
        }

        let (conversation_id, rollout_params) = match &initial_history {
            InitialHistory::New | InitialHistory::Forked(_) => {
                let conversation_id = ConversationId::default();
                (
                    conversation_id,
                    RolloutRecorderParams::new(conversation_id, user_instructions.clone()),
                )
            }
            InitialHistory::Resumed(resumed_history) => (
                resumed_history.conversation_id,
                RolloutRecorderParams::resume(resumed_history.rollout_path.clone()),
            ),
        };

        // Error messages to dispatch after SessionConfigured is sent.
        let mut post_session_configured_error_events = Vec::<Event>::new();

        // Kick off independent async setup tasks in parallel to reduce startup latency.
        //
        // - initialize RolloutRecorder with new or resumed session info
        // - spin up MCP connection manager
        // - perform default shell discovery
        // - load history metadata
        let rollout_fut = RolloutRecorder::new(&config, rollout_params);

        let mcp_fut = McpConnectionManager::new(
            config.mcp_servers.clone(),
            config.use_experimental_use_rmcp_client,
        );
        let default_shell_fut = shell::default_user_shell();
        let history_meta_fut = crate::message_history::history_metadata(&config);

        // Join all independent futures.
        let (rollout_recorder, mcp_res, default_shell, (history_log_id, history_entry_count)) =
            tokio::join!(rollout_fut, mcp_fut, default_shell_fut, history_meta_fut);

        let rollout_recorder = rollout_recorder.map_err(|e| {
            error!("failed to initialize rollout recorder: {e:#}");
            anyhow::anyhow!("failed to initialize rollout recorder: {e:#}")
        })?;
        let rollout_path = rollout_recorder.rollout_path.clone();
        // Create the mutable state for the Session.
        let state = SessionState::new();

        // Handle MCP manager result and record any startup failures.
        let (mcp_connection_manager, failed_clients) = match mcp_res {
            Ok((mgr, failures)) => (mgr, failures),
            Err(e) => {
                let message = format!("Failed to create MCP connection manager: {e:#}");
                error!("{message}");
                post_session_configured_error_events.push(Event {
                    id: INITIAL_SUBMIT_ID.to_owned(),
                    msg: EventMsg::Error(ErrorEvent { message }),
                });
                (McpConnectionManager::default(), Default::default())
            }
        };

        // Surface individual client start-up failures to the user.
        if !failed_clients.is_empty() {
            for (server_name, err) in failed_clients {
                let message = format!("MCP client for `{server_name}` failed to start: {err:#}");
                error!("{message}");
                post_session_configured_error_events.push(Event {
                    id: INITIAL_SUBMIT_ID.to_owned(),
                    msg: EventMsg::Error(ErrorEvent { message }),
                });
            }
        }

        let otel_event_manager = OtelEventManager::new(
            conversation_id,
            config.model.as_str(),
            config.model_family.slug.as_str(),
            auth_manager.auth().and_then(|a| a.get_account_id()),
            auth_manager.auth().map(|a| a.mode),
            config.otel.log_user_prompt,
            terminal::user_agent(),
        );

        otel_event_manager.conversation_starts(
            config.model_provider.name.as_str(),
            config.model_reasoning_effort,
            config.model_reasoning_summary,
            config.model_context_window,
            config.model_max_output_tokens,
            config.model_auto_compact_token_limit,
            config.approval_policy,
            config.sandbox_policy.clone(),
            config.mcp_servers.keys().map(String::as_str).collect(),
            config.active_profile.clone(),
        );

        // Now that the conversation id is final (may have been updated by resume),
        // construct the model client.
        let client = ModelClient::new(
            config.clone(),
            Some(auth_manager.clone()),
            otel_event_manager,
            provider.clone(),
            model_reasoning_effort,
            model_reasoning_summary,
            conversation_id,
        );
        let turn_context = TurnContext {
            client,
            tools_config: ToolsConfig::new(&ToolsConfigParams {
                model_family: &config.model_family,
                include_plan_tool: config.include_plan_tool,
                include_apply_patch_tool: config.include_apply_patch_tool,
                include_web_search_request: config.tools_web_search_request,
                use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
                include_view_image_tool: config.include_view_image_tool,
                experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
            }),
            user_instructions,
            base_instructions,
            approval_policy,
            sandbox_policy,
            shell_environment_policy: config.shell_environment_policy.clone(),
            cwd,
            is_review_mode: false,
            final_output_json_schema: None,
        };
        let services = SessionServices {
            mcp_connection_manager,
            session_manager: ExecSessionManager::default(),
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: notify,
            rollout: Mutex::new(Some(rollout_recorder)),
            codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
            user_shell: default_shell,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
        };

        let sess = Arc::new(Session {
            conversation_id,
            tx_event: tx_event.clone(),
            state: Mutex::new(state),
            active_turn: Mutex::new(None),
            services,
            next_internal_sub_id: AtomicU64::new(0),
        });

        // Dispatch the SessionConfiguredEvent first and then report any errors.
        // If resuming, include converted initial messages in the payload so UIs can render them immediately.
        let initial_messages = initial_history.get_event_msgs();
        sess.record_initial_history(&turn_context, initial_history)
            .await;

        let events = std::iter::once(Event {
            id: INITIAL_SUBMIT_ID.to_owned(),
            msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
                session_id: conversation_id,
                model,
                reasoning_effort: model_reasoning_effort,
                history_log_id,
                history_entry_count,
                initial_messages,
                rollout_path,
            }),
        })
        .chain(post_session_configured_error_events.into_iter());
        for event in events {
            sess.send_event(event).await;
        }

        Ok((sess, turn_context))
    }

    fn next_internal_sub_id(&self) -> String {
        let id = self
            .next_internal_sub_id
            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
        format!("auto-compact-{id}")
    }

    async fn record_initial_history(
        &self,
        turn_context: &TurnContext,
        conversation_history: InitialHistory,
    ) {
        match conversation_history {
            InitialHistory::New => {
                // Build and record initial items (user instructions + environment context)
                let items = self.build_initial_context(turn_context);
                self.record_conversation_items(&items).await;
            }
            InitialHistory::Resumed(_) | InitialHistory::Forked(_) => {
                let rollout_items = conversation_history.get_rollout_items();
                let persist = matches!(conversation_history, InitialHistory::Forked(_));

                // Always add response items to conversation history
                let reconstructed_history =
                    self.reconstruct_history_from_rollout(turn_context, &rollout_items);
                if !reconstructed_history.is_empty() {
                    self.record_into_history(&reconstructed_history).await;
                }

                // If persisting, persist all rollout items as-is (recorder filters)
                if persist && !rollout_items.is_empty() {
                    self.persist_rollout_items(&rollout_items).await;
                }
            }
        }
    }

    /// Persist the event to rollout and send it to clients.
    pub(crate) async fn send_event(&self, event: Event) {
        // Persist the event into rollout (recorder filters as needed)
        let rollout_items = vec![RolloutItem::EventMsg(event.msg.clone())];
        self.persist_rollout_items(&rollout_items).await;
        if let Err(e) = self.tx_event.send(event).await {
            error!("failed to send tool call event: {e}");
        }
    }

    pub async fn request_command_approval(
        &self,
        sub_id: String,
        call_id: String,
        command: Vec<String>,
        cwd: PathBuf,
        reason: Option<String>,
    ) -> ReviewDecision {
        // Add the tx_approve callback to the map before sending the request.
        let (tx_approve, rx_approve) = oneshot::channel();
        let event_id = sub_id.clone();
        let prev_entry = {
            let mut active = self.active_turn.lock().await;
            match active.as_mut() {
                Some(at) => {
                    let mut ts = at.turn_state.lock().await;
                    ts.insert_pending_approval(sub_id, tx_approve)
                }
                None => None,
            }
        };
        if prev_entry.is_some() {
            warn!("Overwriting existing pending approval for sub_id: {event_id}");
        }

        let event = Event {
            id: event_id,
            msg: EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
                call_id,
                command,
                cwd,
                reason,
            }),
        };
        self.send_event(event).await;
        rx_approve.await.unwrap_or_default()
    }

    pub async fn request_patch_approval(
        &self,
        sub_id: String,
        call_id: String,
        action: &ApplyPatchAction,
        reason: Option<String>,
        grant_root: Option<PathBuf>,
    ) -> oneshot::Receiver<ReviewDecision> {
        // Add the tx_approve callback to the map before sending the request.
        let (tx_approve, rx_approve) = oneshot::channel();
        let event_id = sub_id.clone();
        let prev_entry = {
            let mut active = self.active_turn.lock().await;
            match active.as_mut() {
                Some(at) => {
                    let mut ts = at.turn_state.lock().await;
                    ts.insert_pending_approval(sub_id, tx_approve)
                }
                None => None,
            }
        };
        if prev_entry.is_some() {
            warn!("Overwriting existing pending approval for sub_id: {event_id}");
        }

        let event = Event {
            id: event_id,
            msg: EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
                call_id,
                changes: convert_apply_patch_to_protocol(action),
                reason,
                grant_root,
            }),
        };
        self.send_event(event).await;
        rx_approve
    }

    pub async fn notify_approval(&self, sub_id: &str, decision: ReviewDecision) {
        let entry = {
            let mut active = self.active_turn.lock().await;
            match active.as_mut() {
                Some(at) => {
                    let mut ts = at.turn_state.lock().await;
                    ts.remove_pending_approval(sub_id)
                }
                None => None,
            }
        };
        match entry {
            Some(tx_approve) => {
                tx_approve.send(decision).ok();
            }
            None => {
                warn!("No pending approval found for sub_id: {sub_id}");
            }
        }
    }

    pub async fn add_approved_command(&self, cmd: Vec<String>) {
        let mut state = self.state.lock().await;
        state.add_approved_command(cmd);
    }

    /// Records input items: always append to conversation history and
    /// persist these response items to rollout.
    async fn record_conversation_items(&self, items: &[ResponseItem]) {
        self.record_into_history(items).await;
        self.persist_rollout_response_items(items).await;
    }

    fn reconstruct_history_from_rollout(
        &self,
        turn_context: &TurnContext,
        rollout_items: &[RolloutItem],
    ) -> Vec<ResponseItem> {
        let mut history = ConversationHistory::new();
        for item in rollout_items {
            match item {
                RolloutItem::ResponseItem(response_item) => {
                    history.record_items(std::iter::once(response_item));
                }
                RolloutItem::Compacted(compacted) => {
                    let snapshot = history.contents();
                    let user_messages = collect_user_messages(&snapshot);
                    let rebuilt = build_compacted_history(
                        self.build_initial_context(turn_context),
                        &user_messages,
                        &compacted.message,
                    );
                    history.replace(rebuilt);
                }
                _ => {}
            }
        }
        history.contents()
    }

    /// Append ResponseItems to the in-memory conversation history only.
    async fn record_into_history(&self, items: &[ResponseItem]) {
        let mut state = self.state.lock().await;
        state.record_items(items.iter());
    }

    async fn replace_history(&self, items: Vec<ResponseItem>) {
        let mut state = self.state.lock().await;
        state.replace_history(items);
    }

    async fn persist_rollout_response_items(&self, items: &[ResponseItem]) {
        let rollout_items: Vec<RolloutItem> = items
            .iter()
            .cloned()
            .map(RolloutItem::ResponseItem)
            .collect();
        self.persist_rollout_items(&rollout_items).await;
    }

    pub(crate) fn build_initial_context(&self, turn_context: &TurnContext) -> Vec<ResponseItem> {
        let mut items = Vec::<ResponseItem>::with_capacity(2);
        if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
            items.push(UserInstructions::new(user_instructions.to_string()).into());
        }
        items.push(ResponseItem::from(EnvironmentContext::new(
            Some(turn_context.cwd.clone()),
            Some(turn_context.approval_policy),
            Some(turn_context.sandbox_policy.clone()),
            Some(self.user_shell().clone()),
        )));
        items
    }

    async fn persist_rollout_items(&self, items: &[RolloutItem]) {
        let recorder = {
            let guard = self.services.rollout.lock().await;
            guard.clone()
        };
        if let Some(rec) = recorder
            && let Err(e) = rec.record_items(items).await
        {
            error!("failed to record rollout items: {e:#}");
        }
    }

    pub(crate) async fn history_snapshot(&self) -> Vec<ResponseItem> {
        let state = self.state.lock().await;
        state.history_snapshot()
    }

    async fn update_token_usage_info(
        &self,
        sub_id: &str,
        turn_context: &TurnContext,
        token_usage: Option<&TokenUsage>,
    ) {
        {
            let mut state = self.state.lock().await;
            if let Some(token_usage) = token_usage {
                state.update_token_info_from_usage(
                    token_usage,
                    turn_context.client.get_model_context_window(),
                );
            }
        }
        self.send_token_count_event(sub_id).await;
    }

    async fn update_rate_limits(&self, sub_id: &str, new_rate_limits: RateLimitSnapshot) {
        {
            let mut state = self.state.lock().await;
            state.set_rate_limits(new_rate_limits);
        }
        self.send_token_count_event(sub_id).await;
    }

    async fn send_token_count_event(&self, sub_id: &str) {
        let (info, rate_limits) = {
            let state = self.state.lock().await;
            state.token_info_and_rate_limits()
        };
        let event = Event {
            id: sub_id.to_string(),
            msg: EventMsg::TokenCount(TokenCountEvent { info, rate_limits }),
        };
        self.send_event(event).await;
    }

    /// Record a user input item to conversation history and also persist a
    /// corresponding UserMessage EventMsg to rollout.
    async fn record_input_and_rollout_usermsg(&self, response_input: &ResponseInputItem) {
        let response_item: ResponseItem = response_input.clone().into();
        // Add to conversation history and persist response item to rollout
        self.record_conversation_items(std::slice::from_ref(&response_item))
            .await;

        // Derive user message events and persist only UserMessage to rollout
        let msgs =
            map_response_item_to_event_messages(&response_item, self.show_raw_agent_reasoning());
        let user_msgs: Vec<RolloutItem> = msgs
            .into_iter()
            .filter_map(|m| match m {
                EventMsg::UserMessage(ev) => Some(RolloutItem::EventMsg(EventMsg::UserMessage(ev))),
                _ => None,
            })
            .collect();
        if !user_msgs.is_empty() {
            self.persist_rollout_items(&user_msgs).await;
        }
    }

    async fn on_exec_command_begin(
        &self,
        turn_diff_tracker: &mut TurnDiffTracker,
        exec_command_context: ExecCommandContext,
    ) {
        let ExecCommandContext {
            sub_id,
            call_id,
            command_for_display,
            cwd,
            apply_patch,
        } = exec_command_context;
        let msg = match apply_patch {
            Some(ApplyPatchCommandContext {
                user_explicitly_approved_this_action,
                changes,
            }) => {
                turn_diff_tracker.on_patch_begin(&changes);

                EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
                    call_id,
                    auto_approved: !user_explicitly_approved_this_action,
                    changes,
                })
            }
            None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
                call_id,
                command: command_for_display.clone(),
                cwd,
                parsed_cmd: parse_command(&command_for_display)
                    .into_iter()
                    .map(Into::into)
                    .collect(),
            }),
        };
        let event = Event {
            id: sub_id.to_string(),
            msg,
        };
        self.send_event(event).await;
    }

    async fn on_exec_command_end(
        &self,
        turn_diff_tracker: &mut TurnDiffTracker,
        sub_id: &str,
        call_id: &str,
        output: &ExecToolCallOutput,
        is_apply_patch: bool,
    ) {
        let ExecToolCallOutput {
            stdout,
            stderr,
            aggregated_output,
            duration,
            exit_code,
            timed_out: _,
        } = output;
        // Send full stdout/stderr to clients; do not truncate.
        let stdout = stdout.text.clone();
        let stderr = stderr.text.clone();
        let formatted_output = format_exec_output_str(output);
        let aggregated_output: String = aggregated_output.text.clone();

        let msg = if is_apply_patch {
            EventMsg::PatchApplyEnd(PatchApplyEndEvent {
                call_id: call_id.to_string(),
                stdout,
                stderr,
                success: *exit_code == 0,
            })
        } else {
            EventMsg::ExecCommandEnd(ExecCommandEndEvent {
                call_id: call_id.to_string(),
                stdout,
                stderr,
                aggregated_output,
                exit_code: *exit_code,
                duration: *duration,
                formatted_output,
            })
        };

        let event = Event {
            id: sub_id.to_string(),
            msg,
        };
        self.send_event(event).await;

        // If this is an apply_patch, after we emit the end patch, emit a second event
        // with the full turn diff if there is one.
        if is_apply_patch {
            let unified_diff = turn_diff_tracker.get_unified_diff();
            if let Ok(Some(unified_diff)) = unified_diff {
                let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
                let event = Event {
                    id: sub_id.into(),
                    msg,
                };
                self.send_event(event).await;
            }
        }
    }
    /// Runs the exec tool call and emits events for the begin and end of the
    /// command even on error.
    ///
    /// Returns the output of the exec tool call.
    async fn run_exec_with_events<'a>(
        &self,
        turn_diff_tracker: &mut TurnDiffTracker,
        begin_ctx: ExecCommandContext,
        exec_args: ExecInvokeArgs<'a>,
    ) -> crate::error::Result<ExecToolCallOutput> {
        let is_apply_patch = begin_ctx.apply_patch.is_some();
        let sub_id = begin_ctx.sub_id.clone();
        let call_id = begin_ctx.call_id.clone();

        self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone())
            .await;

        let result = process_exec_tool_call(
            exec_args.params,
            exec_args.sandbox_type,
            exec_args.sandbox_policy,
            exec_args.sandbox_cwd,
            exec_args.codex_linux_sandbox_exe,
            exec_args.stdout_stream,
        )
        .await;

        let output_stderr;
        let borrowed: &ExecToolCallOutput = match &result {
            Ok(output) => output,
            Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output,
            Err(e) => {
                output_stderr = ExecToolCallOutput {
                    exit_code: -1,
                    stdout: StreamOutput::new(String::new()),
                    stderr: StreamOutput::new(get_error_message_ui(e)),
                    aggregated_output: StreamOutput::new(get_error_message_ui(e)),
                    duration: Duration::default(),
                    timed_out: false,
                };
                &output_stderr
            }
        };
        self.on_exec_command_end(
            turn_diff_tracker,
            &sub_id,
            &call_id,
            borrowed,
            is_apply_patch,
        )
        .await;

        result
    }

    /// Helper that emits a BackgroundEvent with the given message. This keeps
    /// the call‑sites terse so adding more diagnostics does not clutter the
    /// core agent logic.
    async fn notify_background_event(&self, sub_id: &str, message: impl Into<String>) {
        let event = Event {
            id: sub_id.to_string(),
            msg: EventMsg::BackgroundEvent(BackgroundEventEvent {
                message: message.into(),
            }),
        };
        self.send_event(event).await;
    }

    async fn notify_stream_error(&self, sub_id: &str, message: impl Into<String>) {
        let event = Event {
            id: sub_id.to_string(),
            msg: EventMsg::StreamError(StreamErrorEvent {
                message: message.into(),
            }),
        };
        self.send_event(event).await;
    }

    /// Build the full turn input by concatenating the current conversation
    /// history with additional items for this turn.
    pub async fn turn_input_with_history(&self, extra: Vec<ResponseItem>) -> Vec<ResponseItem> {
        let history = {
            let state = self.state.lock().await;
            state.history_snapshot()
        };
        [history, extra].concat()
    }

    /// Returns the input if there was no task running to inject into
    pub async fn inject_input(&self, input: Vec<InputItem>) -> Result<(), Vec<InputItem>> {
        let mut active = self.active_turn.lock().await;
        match active.as_mut() {
            Some(at) => {
                let mut ts = at.turn_state.lock().await;
                ts.push_pending_input(input.into());
                Ok(())
            }
            None => Err(input),
        }
    }

    pub async fn get_pending_input(&self) -> Vec<ResponseInputItem> {
        let mut active = self.active_turn.lock().await;
        match active.as_mut() {
            Some(at) => {
                let mut ts = at.turn_state.lock().await;
                ts.take_pending_input()
            }
            None => Vec::with_capacity(0),
        }
    }

    pub async fn call_tool(
        &self,
        server: &str,
        tool: &str,
        arguments: Option<serde_json::Value>,
    ) -> anyhow::Result<CallToolResult> {
        self.services
            .mcp_connection_manager
            .call_tool(server, tool, arguments)
            .await
    }

    pub async fn interrupt_task(self: &Arc<Self>) {
        info!("interrupt received: abort current task, if any");
        self.abort_all_tasks(TurnAbortReason::Interrupted).await;
    }

    fn interrupt_task_sync(&self) {
        if let Ok(mut active) = self.active_turn.try_lock()
            && let Some(at) = active.as_mut()
        {
            at.try_clear_pending_sync();
            let tasks = at.drain_tasks();
            *active = None;
            for (_sub_id, task) in tasks {
                task.handle.abort();
            }
        }
    }

    pub(crate) fn notifier(&self) -> &UserNotifier {
        &self.services.notifier
    }

    fn user_shell(&self) -> &shell::Shell {
        &self.services.user_shell
    }

    fn show_raw_agent_reasoning(&self) -> bool {
        self.services.show_raw_agent_reasoning
    }
}

impl Drop for Session {
    fn drop(&mut self) {
        self.interrupt_task_sync();
    }
}

#[derive(Clone, Debug)]
pub(crate) struct ExecCommandContext {
    pub(crate) sub_id: String,
    pub(crate) call_id: String,
    pub(crate) command_for_display: Vec<String>,
    pub(crate) cwd: PathBuf,
    pub(crate) apply_patch: Option<ApplyPatchCommandContext>,
}

#[derive(Clone, Debug)]
pub(crate) struct ApplyPatchCommandContext {
    pub(crate) user_explicitly_approved_this_action: bool,
    pub(crate) changes: HashMap<PathBuf, FileChange>,
}

async fn submission_loop(
    sess: Arc<Session>,
    turn_context: TurnContext,
    config: Arc<Config>,
    rx_sub: Receiver<Submission>,
) {
    // Wrap once to avoid cloning TurnContext for each task.
    let mut turn_context = Arc::new(turn_context);
    // To break out of this loop, send Op::Shutdown.
    while let Ok(sub) = rx_sub.recv().await {
        debug!(?sub, "Submission");
        match sub.op {
            Op::Interrupt => {
                sess.interrupt_task().await;
            }
            Op::OverrideTurnContext {
                cwd,
                approval_policy,
                sandbox_policy,
                model,
                effort,
                summary,
            } => {
                // Recalculate the persistent turn context with provided overrides.
                let prev = Arc::clone(&turn_context);
                let provider = prev.client.get_provider();

                // Effective model + family
                let (effective_model, effective_family) = if let Some(ref m) = model {
                    let fam =
                        find_family_for_model(m).unwrap_or_else(|| config.model_family.clone());
                    (m.clone(), fam)
                } else {
                    (prev.client.get_model(), prev.client.get_model_family())
                };

                // Effective reasoning settings
                let effective_effort = effort.unwrap_or(prev.client.get_reasoning_effort());
                let effective_summary = summary.unwrap_or(prev.client.get_reasoning_summary());

                let auth_manager = prev.client.get_auth_manager();

                // Build updated config for the client
                let mut updated_config = (*config).clone();
                updated_config.model = effective_model.clone();
                updated_config.model_family = effective_family.clone();
                if let Some(model_info) = get_model_info(&effective_family) {
                    updated_config.model_context_window = Some(model_info.context_window);
                }

                let otel_event_manager = prev.client.get_otel_event_manager().with_model(
                    updated_config.model.as_str(),
                    updated_config.model_family.slug.as_str(),
                );

                let client = ModelClient::new(
                    Arc::new(updated_config),
                    auth_manager,
                    otel_event_manager,
                    provider,
                    effective_effort,
                    effective_summary,
                    sess.conversation_id,
                );

                let new_approval_policy = approval_policy.unwrap_or(prev.approval_policy);
                let new_sandbox_policy = sandbox_policy
                    .clone()
                    .unwrap_or(prev.sandbox_policy.clone());
                let new_cwd = cwd.clone().unwrap_or_else(|| prev.cwd.clone());

                let tools_config = ToolsConfig::new(&ToolsConfigParams {
                    model_family: &effective_family,
                    include_plan_tool: config.include_plan_tool,
                    include_apply_patch_tool: config.include_apply_patch_tool,
                    include_web_search_request: config.tools_web_search_request,
                    use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
                    include_view_image_tool: config.include_view_image_tool,
                    experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
                });

                let new_turn_context = TurnContext {
                    client,
                    tools_config,
                    user_instructions: prev.user_instructions.clone(),
                    base_instructions: prev.base_instructions.clone(),
                    approval_policy: new_approval_policy,
                    sandbox_policy: new_sandbox_policy.clone(),
                    shell_environment_policy: prev.shell_environment_policy.clone(),
                    cwd: new_cwd.clone(),
                    is_review_mode: false,
                    final_output_json_schema: None,
                };

                // Install the new persistent context for subsequent tasks/turns.
                turn_context = Arc::new(new_turn_context);

                // Optionally persist changes to model / effort
                if cwd.is_some() || approval_policy.is_some() || sandbox_policy.is_some() {
                    sess.record_conversation_items(&[ResponseItem::from(EnvironmentContext::new(
                        cwd,
                        approval_policy,
                        sandbox_policy,
                        // Shell is not configurable from turn to turn
                        None,
                    ))])
                    .await;
                }
            }
            Op::UserInput { items } => {
                turn_context
                    .client
                    .get_otel_event_manager()
                    .user_prompt(&items);
                // attempt to inject input into current task
                if let Err(items) = sess.inject_input(items).await {
                    // no current task, spawn a new one
                    sess.spawn_task(Arc::clone(&turn_context), sub.id, items, RegularTask)
                        .await;
                }
            }
            Op::UserTurn {
                items,
                cwd,
                approval_policy,
                sandbox_policy,
                model,
                effort,
                summary,
                final_output_json_schema,
            } => {
                turn_context
                    .client
                    .get_otel_event_manager()
                    .user_prompt(&items);
                // attempt to inject input into current task
                if let Err(items) = sess.inject_input(items).await {
                    // Derive a fresh TurnContext for this turn using the provided overrides.
                    let provider = turn_context.client.get_provider();
                    let auth_manager = turn_context.client.get_auth_manager();

                    // Derive a model family for the requested model; fall back to the session's.
                    let model_family = find_family_for_model(&model)
                        .unwrap_or_else(|| config.model_family.clone());

                    // Create a per‑turn Config clone with the requested model/family.
                    let mut per_turn_config = (*config).clone();
                    per_turn_config.model = model.clone();
                    per_turn_config.model_family = model_family.clone();
                    if let Some(model_info) = get_model_info(&model_family) {
                        per_turn_config.model_context_window = Some(model_info.context_window);
                    }

                    let otel_event_manager =
                        turn_context.client.get_otel_event_manager().with_model(
                            per_turn_config.model.as_str(),
                            per_turn_config.model_family.slug.as_str(),
                        );

                    // Build a new client with per‑turn reasoning settings.
                    // Reuse the same provider and session id; auth defaults to env/API key.
                    let client = ModelClient::new(
                        Arc::new(per_turn_config),
                        auth_manager,
                        otel_event_manager,
                        provider,
                        effort,
                        summary,
                        sess.conversation_id,
                    );

                    let fresh_turn_context = TurnContext {
                        client,
                        tools_config: ToolsConfig::new(&ToolsConfigParams {
                            model_family: &model_family,
                            include_plan_tool: config.include_plan_tool,
                            include_apply_patch_tool: config.include_apply_patch_tool,
                            include_web_search_request: config.tools_web_search_request,
                            use_streamable_shell_tool: config
                                .use_experimental_streamable_shell_tool,
                            include_view_image_tool: config.include_view_image_tool,
                            experimental_unified_exec_tool: config
                                .use_experimental_unified_exec_tool,
                        }),
                        user_instructions: turn_context.user_instructions.clone(),
                        base_instructions: turn_context.base_instructions.clone(),
                        approval_policy,
                        sandbox_policy,
                        shell_environment_policy: turn_context.shell_environment_policy.clone(),
                        cwd,
                        is_review_mode: false,
                        final_output_json_schema,
                    };

                    // if the environment context has changed, record it in the conversation history
                    let previous_env_context = EnvironmentContext::from(turn_context.as_ref());
                    let new_env_context = EnvironmentContext::from(&fresh_turn_context);
                    if !new_env_context.equals_except_shell(&previous_env_context) {
                        sess.record_conversation_items(&[ResponseItem::from(new_env_context)])
                            .await;
                    }

                    // Install the new persistent context for subsequent tasks/turns.
                    turn_context = Arc::new(fresh_turn_context);

                    // no current task, spawn a new one with the per-turn context
                    sess.spawn_task(Arc::clone(&turn_context), sub.id, items, RegularTask)
                        .await;
                }
            }
            Op::ExecApproval { id, decision } => match decision {
                ReviewDecision::Abort => {
                    sess.interrupt_task().await;
                }
                other => sess.notify_approval(&id, other).await,
            },
            Op::PatchApproval { id, decision } => match decision {
                ReviewDecision::Abort => {
                    sess.interrupt_task().await;
                }
                other => sess.notify_approval(&id, other).await,
            },
            Op::AddToHistory { text } => {
                let id = sess.conversation_id;
                let config = config.clone();
                tokio::spawn(async move {
                    if let Err(e) = crate::message_history::append_entry(&text, &id, &config).await
                    {
                        warn!("failed to append to message history: {e}");
                    }
                });
            }

            Op::GetHistoryEntryRequest { offset, log_id } => {
                let config = config.clone();
                let sess_clone = sess.clone();
                let sub_id = sub.id.clone();

                tokio::spawn(async move {
                    // Run lookup in blocking thread because it does file IO + locking.
                    let entry_opt = tokio::task::spawn_blocking(move || {
                        crate::message_history::lookup(log_id, offset, &config)
                    })
                    .await
                    .unwrap_or(None);

                    let event = Event {
                        id: sub_id,
                        msg: EventMsg::GetHistoryEntryResponse(
                            crate::protocol::GetHistoryEntryResponseEvent {
                                offset,
                                log_id,
                                entry: entry_opt.map(|e| {
                                    codex_protocol::message_history::HistoryEntry {
                                        conversation_id: e.session_id,
                                        ts: e.ts,
                                        text: e.text,
                                    }
                                }),
                            },
                        ),
                    };

                    sess_clone.send_event(event).await;
                });
            }
            Op::ListMcpTools => {
                let sub_id = sub.id.clone();

                // This is a cheap lookup from the connection manager's cache.
                let tools = sess.services.mcp_connection_manager.list_all_tools();
                let event = Event {
                    id: sub_id,
                    msg: EventMsg::McpListToolsResponse(
                        crate::protocol::McpListToolsResponseEvent { tools },
                    ),
                };
                sess.send_event(event).await;
            }
            Op::ListCustomPrompts => {
                let sub_id = sub.id.clone();

                let custom_prompts: Vec<CustomPrompt> =
                    if let Some(dir) = crate::custom_prompts::default_prompts_dir() {
                        crate::custom_prompts::discover_prompts_in(&dir).await
                    } else {
                        Vec::new()
                    };

                let event = Event {
                    id: sub_id,
                    msg: EventMsg::ListCustomPromptsResponse(ListCustomPromptsResponseEvent {
                        custom_prompts,
                    }),
                };
                sess.send_event(event).await;
            }
            Op::Compact => {
                // Attempt to inject input into current task
                if let Err(items) = sess
                    .inject_input(vec![InputItem::Text {
                        text: compact::SUMMARIZATION_PROMPT.to_string(),
                    }])
                    .await
                {
                    sess.spawn_task(Arc::clone(&turn_context), sub.id, items, CompactTask)
                        .await;
                }
            }
            Op::Shutdown => {
                sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
                info!("Shutting down Codex instance");

                // Gracefully flush and shutdown rollout recorder on session end so tests
                // that inspect the rollout file do not race with the background writer.
                let recorder_opt = {
                    let mut guard = sess.services.rollout.lock().await;
                    guard.take()
                };
                if let Some(rec) = recorder_opt
                    && let Err(e) = rec.shutdown().await
                {
                    warn!("failed to shutdown rollout recorder: {e}");
                    let event = Event {
                        id: sub.id.clone(),
                        msg: EventMsg::Error(ErrorEvent {
                            message: "Failed to shutdown rollout recorder".to_string(),
                        }),
                    };
                    sess.send_event(event).await;
                }

                let event = Event {
                    id: sub.id.clone(),
                    msg: EventMsg::ShutdownComplete,
                };
                sess.send_event(event).await;
                break;
            }
            Op::GetPath => {
                let sub_id = sub.id.clone();
                // Flush rollout writes before returning the path so readers observe a consistent file.
                let (path, rec_opt) = {
                    let guard = sess.services.rollout.lock().await;
                    match guard.as_ref() {
                        Some(rec) => (rec.get_rollout_path(), Some(rec.clone())),
                        None => {
                            error!("rollout recorder not found");
                            continue;
                        }
                    }
                };
                if let Some(rec) = rec_opt
                    && let Err(e) = rec.flush().await
                {
                    warn!("failed to flush rollout recorder before GetHistory: {e}");
                }
                let event = Event {
                    id: sub_id.clone(),
                    msg: EventMsg::ConversationPath(ConversationPathResponseEvent {
                        conversation_id: sess.conversation_id,
                        path,
                    }),
                };
                sess.send_event(event).await;
            }
            Op::Review { review_request } => {
                spawn_review_thread(
                    sess.clone(),
                    config.clone(),
                    turn_context.clone(),
                    sub.id,
                    review_request,
                )
                .await;
            }
            _ => {
                // Ignore unknown ops; enum is non_exhaustive to allow extensions.
            }
        }
    }
    debug!("Agent loop exited");
}

/// Spawn a review thread using the given prompt.
async fn spawn_review_thread(
    sess: Arc<Session>,
    config: Arc<Config>,
    parent_turn_context: Arc<TurnContext>,
    sub_id: String,
    review_request: ReviewRequest,
) {
    let model = config.review_model.clone();
    let review_model_family = find_family_for_model(&model)
        .unwrap_or_else(|| parent_turn_context.client.get_model_family());
    let tools_config = ToolsConfig::new(&ToolsConfigParams {
        model_family: &review_model_family,
        include_plan_tool: false,
        include_apply_patch_tool: config.include_apply_patch_tool,
        include_web_search_request: false,
        use_streamable_shell_tool: false,
        include_view_image_tool: false,
        experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
    });

    let base_instructions = REVIEW_PROMPT.to_string();
    let review_prompt = review_request.prompt.clone();
    let provider = parent_turn_context.client.get_provider();
    let auth_manager = parent_turn_context.client.get_auth_manager();
    let model_family = review_model_family.clone();

    // Build per‑turn client with the requested model/family.
    let mut per_turn_config = (*config).clone();
    per_turn_config.model = model.clone();
    per_turn_config.model_family = model_family.clone();
    per_turn_config.model_reasoning_effort = Some(ReasoningEffortConfig::Low);
    per_turn_config.model_reasoning_summary = ReasoningSummaryConfig::Detailed;
    if let Some(model_info) = get_model_info(&model_family) {
        per_turn_config.model_context_window = Some(model_info.context_window);
    }

    let otel_event_manager = parent_turn_context
        .client
        .get_otel_event_manager()
        .with_model(
            per_turn_config.model.as_str(),
            per_turn_config.model_family.slug.as_str(),
        );

    let per_turn_config = Arc::new(per_turn_config);
    let client = ModelClient::new(
        per_turn_config.clone(),
        auth_manager,
        otel_event_manager,
        provider,
        per_turn_config.model_reasoning_effort,
        per_turn_config.model_reasoning_summary,
        sess.conversation_id,
    );

    let review_turn_context = TurnContext {
        client,
        tools_config,
        user_instructions: None,
        base_instructions: Some(base_instructions.clone()),
        approval_policy: parent_turn_context.approval_policy,
        sandbox_policy: parent_turn_context.sandbox_policy.clone(),
        shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
        cwd: parent_turn_context.cwd.clone(),
        is_review_mode: true,
        final_output_json_schema: None,
    };

    // Seed the child task with the review prompt as the initial user message.
    let input: Vec<InputItem> = vec![InputItem::Text {
        text: format!("{base_instructions}\n\n---\n\nNow, here's your task: {review_prompt}"),
    }];
    let tc = Arc::new(review_turn_context);

    // Clone sub_id for the upcoming announcement before moving it into the task.
    let sub_id_for_event = sub_id.clone();
    sess.spawn_task(tc.clone(), sub_id, input, ReviewTask).await;

    // Announce entering review mode so UIs can switch modes.
    sess.send_event(Event {
        id: sub_id_for_event,
        msg: EventMsg::EnteredReviewMode(review_request),
    })
    .await;
}

/// Takes a user message as input and runs a loop where, at each turn, the model
/// replies with either:
///
/// - requested function calls
/// - an assistant message
///
/// While it is possible for the model to return multiple of these items in a
/// single turn, in practice, we generally one item per turn:
///
/// - If the model requests a function call, we execute it and send the output
///   back to the model in the next turn.
/// - If the model sends only an assistant message, we record it in the
///   conversation history and consider the task complete.
///
/// Review mode: when `turn_context.is_review_mode` is true, the turn runs in an
/// isolated in-memory thread without the parent session's prior history or
/// user_instructions. Emits ExitedReviewMode upon final review message.
pub(crate) async fn run_task(
    sess: Arc<Session>,
    turn_context: Arc<TurnContext>,
    sub_id: String,
    input: Vec<InputItem>,
) -> Option<String> {
    if input.is_empty() {
        return None;
    }
    let event = Event {
        id: sub_id.clone(),
        msg: EventMsg::TaskStarted(TaskStartedEvent {
            model_context_window: turn_context.client.get_model_context_window(),
        }),
    };
    sess.send_event(event).await;

    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
    // For review threads, keep an isolated in-memory history so the
    // model sees a fresh conversation without the parent session's history.
    // For normal turns, continue recording to the session history as before.
    let is_review_mode = turn_context.is_review_mode;
    let mut review_thread_history: Vec<ResponseItem> = Vec::new();
    if is_review_mode {
        // Seed review threads with environment context so the model knows the working directory.
        review_thread_history.extend(sess.build_initial_context(turn_context.as_ref()));
        review_thread_history.push(initial_input_for_turn.into());
    } else {
        sess.record_input_and_rollout_usermsg(&initial_input_for_turn)
            .await;
    }

    let mut last_agent_message: Option<String> = None;
    // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
    // many turns, from the perspective of the user, it is a single turn.
    let mut turn_diff_tracker = TurnDiffTracker::new();
    let mut auto_compact_recently_attempted = false;

    loop {
        // Note that pending_input would be something like a message the user
        // submitted through the UI while the model was running. Though the UI
        // may support this, the model might not.
        let pending_input = sess
            .get_pending_input()
            .await
            .into_iter()
            .map(ResponseItem::from)
            .collect::<Vec<ResponseItem>>();

        // Construct the input that we will send to the model.
        //
        // - For review threads, use the isolated in-memory history so the
        //   model sees a fresh conversation (no parent history/user_instructions).
        //
        // - For normal turns, use the session's full history. When using the
        //   chat completions API (or ZDR clients), the model needs the full
        //   conversation history on each turn. The rollout file, however, should
        //   only record the new items that originated in this turn so that it
        //   represents an append-only log without duplicates.
        let turn_input: Vec<ResponseItem> = if is_review_mode {
            if !pending_input.is_empty() {
                review_thread_history.extend(pending_input);
            }
            review_thread_history.clone()
        } else {
            sess.record_conversation_items(&pending_input).await;
            sess.turn_input_with_history(pending_input).await
        };

        let turn_input_messages: Vec<String> = turn_input
            .iter()
            .filter_map(|item| match item {
                ResponseItem::Message { content, .. } => Some(content),
                _ => None,
            })
            .flat_map(|content| {
                content.iter().filter_map(|item| match item {
                    ContentItem::OutputText { text } => Some(text.clone()),
                    _ => None,
                })
            })
            .collect();
        match run_turn(
            &sess,
            turn_context.as_ref(),
            &mut turn_diff_tracker,
            sub_id.clone(),
            turn_input,
        )
        .await
        {
            Ok(turn_output) => {
                let TurnRunResult {
                    processed_items,
                    total_token_usage,
                } = turn_output;
                let limit = turn_context
                    .client
                    .get_auto_compact_token_limit()
                    .unwrap_or(i64::MAX);
                let total_usage_tokens = total_token_usage
                    .as_ref()
                    .map(TokenUsage::tokens_in_context_window);
                let token_limit_reached = total_usage_tokens
                    .map(|tokens| (tokens as i64) >= limit)
                    .unwrap_or(false);
                let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
                let mut responses = Vec::<ResponseInputItem>::new();
                for processed_response_item in processed_items {
                    let ProcessedResponseItem { item, response } = processed_response_item;
                    match (&item, &response) {
                        (ResponseItem::Message { role, .. }, None) if role == "assistant" => {
                            // If the model returned a message, we need to record it.
                            items_to_record_in_conversation_history.push(item);
                        }
                        (
                            ResponseItem::LocalShellCall { .. },
                            Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
                        ) => {
                            items_to_record_in_conversation_history.push(item);
                            items_to_record_in_conversation_history.push(
                                ResponseItem::FunctionCallOutput {
                                    call_id: call_id.clone(),
                                    output: output.clone(),
                                },
                            );
                        }
                        (
                            ResponseItem::FunctionCall { .. },
                            Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
                        ) => {
                            items_to_record_in_conversation_history.push(item);
                            items_to_record_in_conversation_history.push(
                                ResponseItem::FunctionCallOutput {
                                    call_id: call_id.clone(),
                                    output: output.clone(),
                                },
                            );
                        }
                        (
                            ResponseItem::CustomToolCall { .. },
                            Some(ResponseInputItem::CustomToolCallOutput { call_id, output }),
                        ) => {
                            items_to_record_in_conversation_history.push(item);
                            items_to_record_in_conversation_history.push(
                                ResponseItem::CustomToolCallOutput {
                                    call_id: call_id.clone(),
                                    output: output.clone(),
                                },
                            );
                        }
                        (
                            ResponseItem::FunctionCall { .. },
                            Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
                        ) => {
                            items_to_record_in_conversation_history.push(item);
                            let output = match result {
                                Ok(call_tool_result) => {
                                    convert_call_tool_result_to_function_call_output_payload(
                                        call_tool_result,
                                    )
                                }
                                Err(err) => FunctionCallOutputPayload {
                                    content: err.clone(),
                                    success: Some(false),
                                },
                            };
                            items_to_record_in_conversation_history.push(
                                ResponseItem::FunctionCallOutput {
                                    call_id: call_id.clone(),
                                    output,
                                },
                            );
                        }
                        (
                            ResponseItem::Reasoning {
                                id,
                                summary,
                                content,
                                encrypted_content,
                            },
                            None,
                        ) => {
                            items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
                                id: id.clone(),
                                summary: summary.clone(),
                                content: content.clone(),
                                encrypted_content: encrypted_content.clone(),
                            });
                        }
                        _ => {
                            warn!("Unexpected response item: {item:?} with response: {response:?}");
                        }
                    };
                    if let Some(response) = response {
                        responses.push(response);
                    }
                }

                // Only attempt to take the lock if there is something to record.
                if !items_to_record_in_conversation_history.is_empty() {
                    if is_review_mode {
                        review_thread_history
                            .extend(items_to_record_in_conversation_history.clone());
                    } else {
                        sess.record_conversation_items(&items_to_record_in_conversation_history)
                            .await;
                    }
                }

                if token_limit_reached {
                    if auto_compact_recently_attempted {
                        let limit_str = limit.to_string();
                        let current_tokens = total_usage_tokens
                            .map(|tokens| tokens.to_string())
                            .unwrap_or_else(|| "unknown".to_string());
                        let event = Event {
                            id: sub_id.clone(),
                            msg: EventMsg::Error(ErrorEvent {
                                message: format!(
                                    "Conversation is still above the token limit after automatic summarization (limit {limit_str}, current {current_tokens}). Please start a new session or trim your input."
                                ),
                            }),
                        };
                        sess.send_event(event).await;
                        break;
                    }
                    auto_compact_recently_attempted = true;
                    compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
                    continue;
                }

                auto_compact_recently_attempted = false;

                if responses.is_empty() {
                    last_agent_message = get_last_assistant_message_from_turn(
                        &items_to_record_in_conversation_history,
                    );
                    sess.notifier()
                        .notify(&UserNotification::AgentTurnComplete {
                            turn_id: sub_id.clone(),
                            input_messages: turn_input_messages,
                            last_assistant_message: last_agent_message.clone(),
                        });
                    break;
                }
                continue;
            }
            Err(e) => {
                info!("Turn error: {e:#}");
                let event = Event {
                    id: sub_id.clone(),
                    msg: EventMsg::Error(ErrorEvent {
                        message: e.to_string(),
                    }),
                };
                sess.send_event(event).await;
                // let the user continue the conversation
                break;
            }
        }
    }

    // If this was a review thread and we have a final assistant message,
    // try to parse it as a ReviewOutput.
    //
    // If parsing fails, construct a minimal ReviewOutputEvent using the plain
    // text as the overall explanation. Else, just exit review mode with None.
    //
    // Emits an ExitedReviewMode event with the parsed review output.
    if turn_context.is_review_mode {
        exit_review_mode(
            sess.clone(),
            sub_id.clone(),
            last_agent_message.as_deref().map(parse_review_output_event),
        )
        .await;
    }

    last_agent_message
}

/// Parse the review output; when not valid JSON, build a structured
/// fallback that carries the plain text as the overall explanation.
///
/// Returns: a ReviewOutputEvent parsed from JSON or a fallback populated from text.
fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
    // Try direct parse first
    if let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(text) {
        return ev;
    }
    // If wrapped in markdown fences or extra prose, attempt to extract the first JSON object
    if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}'))
        && start < end
        && let Some(slice) = text.get(start..=end)
        && let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(slice)
    {
        return ev;
    }
    // Not JSON – return a structured ReviewOutputEvent that carries
    // the plain text as the overall explanation.
    ReviewOutputEvent {
        overall_explanation: text.to_string(),
        ..Default::default()
    }
}

async fn run_turn(
    sess: &Session,
    turn_context: &TurnContext,
    turn_diff_tracker: &mut TurnDiffTracker,
    sub_id: String,
    input: Vec<ResponseItem>,
) -> CodexResult<TurnRunResult> {
    let tools = get_openai_tools(
        &turn_context.tools_config,
        Some(sess.services.mcp_connection_manager.list_all_tools()),
    );

    let prompt = Prompt {
        input,
        tools,
        base_instructions_override: turn_context.base_instructions.clone(),
        output_schema: turn_context.final_output_json_schema.clone(),
    };

    let mut retries = 0;
    loop {
        match try_run_turn(sess, turn_context, turn_diff_tracker, &sub_id, &prompt).await {
            Ok(output) => return Ok(output),
            Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
            Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
            Err(CodexErr::UsageLimitReached(e)) => {
                let rate_limits = e.rate_limits.clone();
                if let Some(rate_limits) = rate_limits {
                    sess.update_rate_limits(&sub_id, rate_limits).await;
                }
                return Err(CodexErr::UsageLimitReached(e));
            }
            Err(CodexErr::UsageNotIncluded) => return Err(CodexErr::UsageNotIncluded),
            Err(e) => {
                // Use the configured provider-specific stream retry budget.
                let max_retries = turn_context.client.get_provider().stream_max_retries();
                if retries < max_retries {
                    retries += 1;
                    let delay = match e {
                        CodexErr::Stream(_, Some(delay)) => delay,
                        _ => backoff(retries),
                    };
                    warn!(
                        "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",
                    );

                    // Surface retry information to any UI/front‑end so the
                    // user understands what is happening instead of staring
                    // at a seemingly frozen screen.
                    sess.notify_stream_error(
                        &sub_id,
                        format!(
                            "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…"
                        ),
                    )
                    .await;

                    tokio::time::sleep(delay).await;
                } else {
                    return Err(e);
                }
            }
        }
    }
}

/// When the model is prompted, it returns a stream of events. Some of these
/// events map to a `ResponseItem`. A `ResponseItem` may need to be
/// "handled" such that it produces a `ResponseInputItem` that needs to be
/// sent back to the model on the next turn.
#[derive(Debug)]
struct ProcessedResponseItem {
    item: ResponseItem,
    response: Option<ResponseInputItem>,
}

#[derive(Debug)]
struct TurnRunResult {
    processed_items: Vec<ProcessedResponseItem>,
    total_token_usage: Option<TokenUsage>,
}

async fn try_run_turn(
    sess: &Session,
    turn_context: &TurnContext,
    turn_diff_tracker: &mut TurnDiffTracker,
    sub_id: &str,
    prompt: &Prompt,
) -> CodexResult<TurnRunResult> {
    // call_ids that are part of this response.
    let completed_call_ids = prompt
        .input
        .iter()
        .filter_map(|ri| match ri {
            ResponseItem::FunctionCallOutput { call_id, .. } => Some(call_id),
            ResponseItem::LocalShellCall {
                call_id: Some(call_id),
                ..
            } => Some(call_id),
            ResponseItem::CustomToolCallOutput { call_id, .. } => Some(call_id),
            _ => None,
        })
        .collect::<Vec<_>>();

    // call_ids that were pending but are not part of this response.
    // This usually happens because the user interrupted the model before we responded to one of its tool calls
    // and then the user sent a follow-up message.
    let missing_calls = {
        prompt
            .input
            .iter()
            .filter_map(|ri| match ri {
                ResponseItem::FunctionCall { call_id, .. } => Some(call_id),
                ResponseItem::LocalShellCall {
                    call_id: Some(call_id),
                    ..
                } => Some(call_id),
                ResponseItem::CustomToolCall { call_id, .. } => Some(call_id),
                _ => None,
            })
            .filter_map(|call_id| {
                if completed_call_ids.contains(&call_id) {
                    None
                } else {
                    Some(call_id.clone())
                }
            })
            .map(|call_id| ResponseItem::CustomToolCallOutput {
                call_id,
                output: "aborted".to_string(),
            })
            .collect::<Vec<_>>()
    };
    let prompt: Cow<Prompt> = if missing_calls.is_empty() {
        Cow::Borrowed(prompt)
    } else {
        // Add the synthetic aborted missing calls to the beginning of the input to ensure all call ids have responses.
        let input = [missing_calls, prompt.input.clone()].concat();
        Cow::Owned(Prompt {
            input,
            ..prompt.clone()
        })
    };

    let rollout_item = RolloutItem::TurnContext(TurnContextItem {
        cwd: turn_context.cwd.clone(),
        approval_policy: turn_context.approval_policy,
        sandbox_policy: turn_context.sandbox_policy.clone(),
        model: turn_context.client.get_model(),
        effort: turn_context.client.get_reasoning_effort(),
        summary: turn_context.client.get_reasoning_summary(),
    });
    sess.persist_rollout_items(&[rollout_item]).await;
    let mut stream = turn_context.client.clone().stream(&prompt).await?;

    let mut output = Vec::new();

    loop {
        // Poll the next item from the model stream. We must inspect *both* Ok and Err
        // cases so that transient stream failures (e.g., dropped SSE connection before
        // `response.completed`) bubble up and trigger the caller's retry logic.
        let event = stream.next().await;
        let Some(event) = event else {
            // Channel closed without yielding a final Completed event or explicit error.
            // Treat as a disconnected stream so the caller can retry.
            return Err(CodexErr::Stream(
                "stream closed before response.completed".into(),
                None,
            ));
        };

        let event = match event {
            Ok(ev) => ev,
            Err(e) => {
                // Propagate the underlying stream error to the caller (run_turn), which
                // will apply the configured `stream_max_retries` policy.
                return Err(e);
            }
        };

        match event {
            ResponseEvent::Created => {}
            ResponseEvent::OutputItemDone(item) => {
                let response = handle_response_item(
                    sess,
                    turn_context,
                    turn_diff_tracker,
                    sub_id,
                    item.clone(),
                )
                .await?;
                output.push(ProcessedResponseItem { item, response });
            }
            ResponseEvent::WebSearchCallBegin { call_id } => {
                let _ = sess
                    .tx_event
                    .send(Event {
                        id: sub_id.to_string(),
                        msg: EventMsg::WebSearchBegin(WebSearchBeginEvent { call_id }),
                    })
                    .await;
            }
            ResponseEvent::RateLimits(snapshot) => {
                // Update internal state with latest rate limits, but defer sending until
                // token usage is available to avoid duplicate TokenCount events.
                sess.update_rate_limits(sub_id, snapshot).await;
            }
            ResponseEvent::Completed {
                response_id: _,
                token_usage,
            } => {
                sess.update_token_usage_info(sub_id, turn_context, token_usage.as_ref())
                    .await;

                let unified_diff = turn_diff_tracker.get_unified_diff();
                if let Ok(Some(unified_diff)) = unified_diff {
                    let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
                    let event = Event {
                        id: sub_id.to_string(),
                        msg,
                    };
                    sess.send_event(event).await;
                }

                let result = TurnRunResult {
                    processed_items: output,
                    total_token_usage: token_usage.clone(),
                };

                return Ok(result);
            }
            ResponseEvent::OutputTextDelta(delta) => {
                // In review child threads, suppress assistant text deltas; the
                // UI will show a selection popup from the final ReviewOutput.
                if !turn_context.is_review_mode {
                    let event = Event {
                        id: sub_id.to_string(),
                        msg: EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }),
                    };
                    sess.send_event(event).await;
                } else {
                    trace!("suppressing OutputTextDelta in review mode");
                }
            }
            ResponseEvent::ReasoningSummaryDelta(delta) => {
                let event = Event {
                    id: sub_id.to_string(),
                    msg: EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta }),
                };
                sess.send_event(event).await;
            }
            ResponseEvent::ReasoningSummaryPartAdded => {
                let event = Event {
                    id: sub_id.to_string(),
                    msg: EventMsg::AgentReasoningSectionBreak(AgentReasoningSectionBreakEvent {}),
                };
                sess.send_event(event).await;
            }
            ResponseEvent::ReasoningContentDelta(delta) => {
                if sess.show_raw_agent_reasoning() {
                    let event = Event {
                        id: sub_id.to_string(),
                        msg: EventMsg::AgentReasoningRawContentDelta(
                            AgentReasoningRawContentDeltaEvent { delta },
                        ),
                    };
                    sess.send_event(event).await;
                }
            }
        }
    }
}

async fn handle_response_item(
    sess: &Session,
    turn_context: &TurnContext,
    turn_diff_tracker: &mut TurnDiffTracker,
    sub_id: &str,
    item: ResponseItem,
) -> CodexResult<Option<ResponseInputItem>> {
    debug!(?item, "Output item");
    let output = match item {
        ResponseItem::FunctionCall {
            name,
            arguments,
            call_id,
            ..
        } => {
            info!("FunctionCall: {name}({arguments})");
            if let Some((server, tool_name)) =
                sess.services.mcp_connection_manager.parse_tool_name(&name)
            {
                let resp = handle_mcp_tool_call(
                    sess,
                    sub_id,
                    call_id.clone(),
                    server,
                    tool_name,
                    arguments,
                )
                .await;
                Some(resp)
            } else {
                let result = turn_context
                    .client
                    .get_otel_event_manager()
                    .log_tool_result(name.as_str(), call_id.as_str(), arguments.as_str(), || {
                        handle_function_call(
                            sess,
                            turn_context,
                            turn_diff_tracker,
                            sub_id.to_string(),
                            name.to_owned(),
                            arguments.to_owned(),
                            call_id.clone(),
                        )
                    })
                    .await;

                let output = match result {
                    Ok(content) => FunctionCallOutputPayload {
                        content,
                        success: Some(true),
                    },
                    Err(FunctionCallError::RespondToModel(msg)) => FunctionCallOutputPayload {
                        content: msg,
                        success: Some(false),
                    },
                };
                Some(ResponseInputItem::FunctionCallOutput { call_id, output })
            }
        }
        ResponseItem::LocalShellCall {
            id,
            call_id,
            status: _,
            action,
        } => {
            let name = "local_shell";
            let LocalShellAction::Exec(action) = action;
            tracing::info!("LocalShellCall: {action:?}");
            let params = ShellToolCallParams {
                command: action.command,
                workdir: action.working_directory,
                timeout_ms: action.timeout_ms,
                with_escalated_permissions: None,
                justification: None,
            };
            let effective_call_id = match (call_id, id) {
                (Some(call_id), _) => call_id,
                (None, Some(id)) => id,
                (None, None) => {
                    let error_message = "LocalShellCall without call_id or id";

                    turn_context
                        .client
                        .get_otel_event_manager()
                        .log_tool_failed(name, error_message);

                    error!(error_message);
                    return Ok(Some(ResponseInputItem::FunctionCallOutput {
                        call_id: "".to_string(),
                        output: FunctionCallOutputPayload {
                            content: error_message.to_string(),
                            success: None,
                        },
                    }));
                }
            };

            let exec_params = to_exec_params(params, turn_context);
            {
                let result = turn_context
                    .client
                    .get_otel_event_manager()
                    .log_tool_result(
                        name,
                        effective_call_id.as_str(),
                        exec_params.command.join(" ").as_str(),
                        || {
                            handle_container_exec_with_params(
                                name,
                                exec_params,
                                sess,
                                turn_context,
                                turn_diff_tracker,
                                sub_id.to_string(),
                                effective_call_id.clone(),
                            )
                        },
                    )
                    .await;

                let output = match result {
                    Ok(content) => FunctionCallOutputPayload {
                        content,
                        success: Some(true),
                    },
                    Err(FunctionCallError::RespondToModel(msg)) => FunctionCallOutputPayload {
                        content: msg,
                        success: Some(false),
                    },
                };
                Some(ResponseInputItem::FunctionCallOutput {
                    call_id: effective_call_id,
                    output,
                })
            }
        }
        ResponseItem::CustomToolCall {
            id: _,
            call_id,
            name,
            input,
            status: _,
        } => {
            let result = turn_context
                .client
                .get_otel_event_manager()
                .log_tool_result(name.as_str(), call_id.as_str(), input.as_str(), || {
                    handle_custom_tool_call(
                        sess,
                        turn_context,
                        turn_diff_tracker,
                        sub_id.to_string(),
                        name.to_owned(),
                        input.to_owned(),
                        call_id.clone(),
                    )
                })
                .await;

            let output = match result {
                Ok(content) => content,
                Err(FunctionCallError::RespondToModel(msg)) => msg,
            };
            Some(ResponseInputItem::CustomToolCallOutput { call_id, output })
        }
        ResponseItem::FunctionCallOutput { .. } => {
            debug!("unexpected FunctionCallOutput from stream");
            None
        }
        ResponseItem::CustomToolCallOutput { .. } => {
            debug!("unexpected CustomToolCallOutput from stream");
            None
        }
        ResponseItem::Message { .. }
        | ResponseItem::Reasoning { .. }
        | ResponseItem::WebSearchCall { .. } => {
            // In review child threads, suppress assistant message events but
            // keep reasoning/web search.
            let msgs = match &item {
                ResponseItem::Message { .. } if turn_context.is_review_mode => {
                    trace!("suppressing assistant Message in review mode");
                    Vec::new()
                }
                _ => map_response_item_to_event_messages(&item, sess.show_raw_agent_reasoning()),
            };
            for msg in msgs {
                let event = Event {
                    id: sub_id.to_string(),
                    msg,
                };
                sess.send_event(event).await;
            }
            None
        }
        ResponseItem::Other => None,
    };
    Ok(output)
}

async fn handle_unified_exec_tool_call(
    sess: &Session,
    session_id: Option<String>,
    arguments: Vec<String>,
    timeout_ms: Option<u64>,
) -> Result<String, FunctionCallError> {
    let parsed_session_id = if let Some(session_id) = session_id {
        match session_id.parse::<i32>() {
            Ok(parsed) => Some(parsed),
            Err(output) => {
                return Err(FunctionCallError::RespondToModel(format!(
                    "invalid session_id: {session_id} due to error {output:?}"
                )));
            }
        }
    } else {
        None
    };

    let request = crate::unified_exec::UnifiedExecRequest {
        session_id: parsed_session_id,
        input_chunks: &arguments,
        timeout_ms,
    };

    let value = sess
        .services
        .unified_exec_manager
        .handle_request(request)
        .await
        .map_err(|err| {
            FunctionCallError::RespondToModel(format!("unified exec failed: {err:?}"))
        })?;

    #[derive(Serialize)]
    struct SerializedUnifiedExecResult {
        session_id: Option<String>,
        output: String,
    }

    serde_json::to_string(&SerializedUnifiedExecResult {
        session_id: value.session_id.map(|id| id.to_string()),
        output: value.output,
    })
    .map_err(|err| {
        FunctionCallError::RespondToModel(format!(
            "failed to serialize unified exec output: {err:?}"
        ))
    })
}

async fn handle_function_call(
    sess: &Session,
    turn_context: &TurnContext,
    turn_diff_tracker: &mut TurnDiffTracker,
    sub_id: String,
    name: String,
    arguments: String,
    call_id: String,
) -> Result<String, FunctionCallError> {
    match name.as_str() {
        "container.exec" | "shell" => {
            let params = parse_container_exec_arguments(arguments, turn_context, &call_id)?;
            handle_container_exec_with_params(
                name.as_str(),
                params,
                sess,
                turn_context,
                turn_diff_tracker,
                sub_id,
                call_id,
            )
            .await
        }
        "unified_exec" => {
            #[derive(Deserialize)]
            struct UnifiedExecArgs {
                input: Vec<String>,
                #[serde(default)]
                session_id: Option<String>,
                #[serde(default)]
                timeout_ms: Option<u64>,
            }

            let args: UnifiedExecArgs = serde_json::from_str(&arguments).map_err(|err| {
                FunctionCallError::RespondToModel(format!(
                    "failed to parse function arguments: {err:?}"
                ))
            })?;

            handle_unified_exec_tool_call(sess, args.session_id, args.input, args.timeout_ms).await
        }
        "view_image" => {
            #[derive(serde::Deserialize)]
            struct SeeImageArgs {
                path: String,
            }
            let args: SeeImageArgs = serde_json::from_str(&arguments).map_err(|e| {
                FunctionCallError::RespondToModel(format!(
                    "failed to parse function arguments: {e:?}"
                ))
            })?;
            let abs = turn_context.resolve_path(Some(args.path));
            sess.inject_input(vec![InputItem::LocalImage { path: abs }])
                .await
                .map_err(|_| {
                    FunctionCallError::RespondToModel(
                        "unable to attach image (no active task)".to_string(),
                    )
                })?;

            Ok("attached local image path".to_string())
        }
        "apply_patch" => {
            let args: ApplyPatchToolArgs = serde_json::from_str(&arguments).map_err(|e| {
                FunctionCallError::RespondToModel(format!(
                    "failed to parse function arguments: {e:?}"
                ))
            })?;
            let exec_params = ExecParams {
                command: vec!["apply_patch".to_string(), args.input.clone()],
                cwd: turn_context.cwd.clone(),
                timeout_ms: None,
                env: HashMap::new(),
                with_escalated_permissions: None,
                justification: None,
            };
            handle_container_exec_with_params(
                name.as_str(),
                exec_params,
                sess,
                turn_context,
                turn_diff_tracker,
                sub_id,
                call_id,
            )
            .await
        }
        "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
        EXEC_COMMAND_TOOL_NAME => {
            // TODO(mbolin): Sandbox check.
            let exec_params: ExecCommandParams = serde_json::from_str(&arguments).map_err(|e| {
                FunctionCallError::RespondToModel(format!(
                    "failed to parse function arguments: {e:?}"
                ))
            })?;
            let result = sess
                .services
                .session_manager
                .handle_exec_command_request(exec_params)
                .await;
            match result {
                Ok(output) => Ok(output.to_text_output()),
                Err(err) => Err(FunctionCallError::RespondToModel(err)),
            }
        }
        WRITE_STDIN_TOOL_NAME => {
            let write_stdin_params =
                serde_json::from_str::<WriteStdinParams>(&arguments).map_err(|e| {
                    FunctionCallError::RespondToModel(format!(
                        "failed to parse function arguments: {e:?}"
                    ))
                })?;

            let result = sess
                .services
                .session_manager
                .handle_write_stdin_request(write_stdin_params)
                .await
                .map_err(FunctionCallError::RespondToModel)?;

            Ok(result.to_text_output())
        }
        _ => Err(FunctionCallError::RespondToModel(format!(
            "unsupported call: {name}"
        ))),
    }
}

async fn handle_custom_tool_call(
    sess: &Session,
    turn_context: &TurnContext,
    turn_diff_tracker: &mut TurnDiffTracker,
    sub_id: String,
    name: String,
    input: String,
    call_id: String,
) -> Result<String, FunctionCallError> {
    info!("CustomToolCall: {name} {input}");
    match name.as_str() {
        "apply_patch" => {
            let exec_params = ExecParams {
                command: vec!["apply_patch".to_string(), input.clone()],
                cwd: turn_context.cwd.clone(),
                timeout_ms: None,
                env: HashMap::new(),
                with_escalated_permissions: None,
                justification: None,
            };

            handle_container_exec_with_params(
                name.as_str(),
                exec_params,
                sess,
                turn_context,
                turn_diff_tracker,
                sub_id,
                call_id,
            )
            .await
        }
        _ => {
            debug!("unexpected CustomToolCall from stream");
            Err(FunctionCallError::RespondToModel(format!(
                "unsupported custom tool call: {name}"
            )))
        }
    }
}

fn to_exec_params(params: ShellToolCallParams, turn_context: &TurnContext) -> ExecParams {
    ExecParams {
        command: params.command,
        cwd: turn_context.resolve_path(params.workdir.clone()),
        timeout_ms: params.timeout_ms,
        env: create_env(&turn_context.shell_environment_policy),
        with_escalated_permissions: params.with_escalated_permissions,
        justification: params.justification,
    }
}

fn parse_container_exec_arguments(
    arguments: String,
    turn_context: &TurnContext,
    _call_id: &str,
) -> Result<ExecParams, FunctionCallError> {
    serde_json::from_str::<ShellToolCallParams>(&arguments)
        .map(|p| to_exec_params(p, turn_context))
        .map_err(|e| {
            FunctionCallError::RespondToModel(format!("failed to parse function arguments: {e:?}"))
        })
}

pub struct ExecInvokeArgs<'a> {
    pub params: ExecParams,
    pub sandbox_type: SandboxType,
    pub sandbox_policy: &'a SandboxPolicy,
    pub sandbox_cwd: &'a Path,
    pub codex_linux_sandbox_exe: &'a Option<PathBuf>,
    pub stdout_stream: Option<StdoutStream>,
}

fn maybe_translate_shell_command(
    params: ExecParams,
    sess: &Session,
    turn_context: &TurnContext,
) -> ExecParams {
    let should_translate = matches!(sess.user_shell(), crate::shell::Shell::PowerShell(_))
        || turn_context.shell_environment_policy.use_profile;

    if should_translate
        && let Some(command) = sess
            .user_shell()
            .format_default_shell_invocation(params.command.clone())
    {
        return ExecParams { command, ..params };
    }
    params
}

async fn handle_container_exec_with_params(
    tool_name: &str,
    params: ExecParams,
    sess: &Session,
    turn_context: &TurnContext,
    turn_diff_tracker: &mut TurnDiffTracker,
    sub_id: String,
    call_id: String,
) -> Result<String, FunctionCallError> {
    let otel_event_manager = turn_context.client.get_otel_event_manager();

    if params.with_escalated_permissions.unwrap_or(false)
        && !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
    {
        return Err(FunctionCallError::RespondToModel(format!(
            "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
            policy = turn_context.approval_policy
        )));
    }

    // check if this was a patch, and apply it if so
    let apply_patch_exec = match maybe_parse_apply_patch_verified(&params.command, &params.cwd) {
        MaybeApplyPatchVerified::Body(changes) => {
            match apply_patch::apply_patch(sess, turn_context, &sub_id, &call_id, changes).await {
                InternalApplyPatchInvocation::Output(item) => return item,
                InternalApplyPatchInvocation::DelegateToExec(apply_patch_exec) => {
                    Some(apply_patch_exec)
                }
            }
        }
        MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
            // It looks like an invocation of `apply_patch`, but we
            // could not resolve it into a patch that would apply
            // cleanly. Return to model for resample.
            return Err(FunctionCallError::RespondToModel(format!(
                "error: {parse_error:#?}"
            )));
        }
        MaybeApplyPatchVerified::ShellParseError(error) => {
            trace!("Failed to parse shell command, {error:?}");
            None
        }
        MaybeApplyPatchVerified::NotApplyPatch => None,
    };

    let (params, safety, command_for_display) = match &apply_patch_exec {
        Some(ApplyPatchExec {
            action: ApplyPatchAction { patch, cwd, .. },
            user_explicitly_approved_this_action,
        }) => {
            let path_to_codex = std::env::current_exe()
                .ok()
                .map(|p| p.to_string_lossy().to_string());
            let Some(path_to_codex) = path_to_codex else {
                return Err(FunctionCallError::RespondToModel(
                    "failed to determine path to codex executable".to_string(),
                ));
            };

            let params = ExecParams {
                command: vec![
                    path_to_codex,
                    CODEX_APPLY_PATCH_ARG1.to_string(),
                    patch.clone(),
                ],
                cwd: cwd.clone(),
                timeout_ms: params.timeout_ms,
                env: HashMap::new(),
                with_escalated_permissions: params.with_escalated_permissions,
                justification: params.justification.clone(),
            };
            let safety = if *user_explicitly_approved_this_action {
                SafetyCheck::AutoApprove {
                    sandbox_type: SandboxType::None,
                    user_explicitly_approved: true,
                }
            } else {
                assess_safety_for_untrusted_command(
                    turn_context.approval_policy,
                    &turn_context.sandbox_policy,
                    params.with_escalated_permissions.unwrap_or(false),
                )
            };
            (
                params,
                safety,
                vec!["apply_patch".to_string(), patch.clone()],
            )
        }
        None => {
            let safety = {
                let state = sess.state.lock().await;
                assess_command_safety(
                    &params.command,
                    turn_context.approval_policy,
                    &turn_context.sandbox_policy,
                    state.approved_commands_ref(),
                    params.with_escalated_permissions.unwrap_or(false),
                )
            };
            let command_for_display = params.command.clone();
            (params, safety, command_for_display)
        }
    };

    let sandbox_type = match safety {
        SafetyCheck::AutoApprove {
            sandbox_type,
            user_explicitly_approved,
        } => {
            otel_event_manager.tool_decision(
                tool_name,
                call_id.as_str(),
                ReviewDecision::Approved,
                if user_explicitly_approved {
                    ToolDecisionSource::User
                } else {
                    ToolDecisionSource::Config
                },
            );

            sandbox_type
        }
        SafetyCheck::AskUser => {
            let decision = sess
                .request_command_approval(
                    sub_id.clone(),
                    call_id.clone(),
                    params.command.clone(),
                    params.cwd.clone(),
                    params.justification.clone(),
                )
                .await;
            match decision {
                ReviewDecision::Approved => {
                    otel_event_manager.tool_decision(
                        tool_name,
                        call_id.as_str(),
                        ReviewDecision::Approved,
                        ToolDecisionSource::User,
                    );
                }
                ReviewDecision::ApprovedForSession => {
                    otel_event_manager.tool_decision(
                        tool_name,
                        call_id.as_str(),
                        ReviewDecision::ApprovedForSession,
                        ToolDecisionSource::User,
                    );
                    sess.add_approved_command(params.command.clone()).await;
                }
                ReviewDecision::Denied => {
                    otel_event_manager.tool_decision(
                        tool_name,
                        call_id.as_str(),
                        ReviewDecision::Denied,
                        ToolDecisionSource::User,
                    );
                    return Err(FunctionCallError::RespondToModel(
                        "exec command rejected by user".to_string(),
                    ));
                }
                ReviewDecision::Abort => {
                    otel_event_manager.tool_decision(
                        tool_name,
                        call_id.as_str(),
                        ReviewDecision::Abort,
                        ToolDecisionSource::User,
                    );
                    return Err(FunctionCallError::RespondToModel(
                        "exec command aborted by user".to_string(),
                    ));
                }
            }
            // No sandboxing is applied because the user has given
            // explicit approval. Often, we end up in this case because
            // the command cannot be run in a sandbox, such as
            // installing a new dependency that requires network access.
            SandboxType::None
        }
        SafetyCheck::Reject { reason } => {
            otel_event_manager.tool_decision(
                tool_name,
                call_id.as_str(),
                ReviewDecision::Denied,
                ToolDecisionSource::Config,
            );
            return Err(FunctionCallError::RespondToModel(format!(
                "exec command rejected: {reason:?}"
            )));
        }
    };

    let exec_command_context = ExecCommandContext {
        sub_id: sub_id.clone(),
        call_id: call_id.clone(),
        command_for_display: command_for_display.clone(),
        cwd: params.cwd.clone(),
        apply_patch: apply_patch_exec.map(
            |ApplyPatchExec {
                 action,
                 user_explicitly_approved_this_action,
             }| ApplyPatchCommandContext {
                user_explicitly_approved_this_action,
                changes: convert_apply_patch_to_protocol(&action),
            },
        ),
    };

    let params = maybe_translate_shell_command(params, sess, turn_context);
    let output_result = sess
        .run_exec_with_events(
            turn_diff_tracker,
            exec_command_context.clone(),
            ExecInvokeArgs {
                params: params.clone(),
                sandbox_type,
                sandbox_policy: &turn_context.sandbox_policy,
                sandbox_cwd: &turn_context.cwd,
                codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,
                stdout_stream: if exec_command_context.apply_patch.is_some() {
                    None
                } else {
                    Some(StdoutStream {
                        sub_id: sub_id.clone(),
                        call_id: call_id.clone(),
                        tx_event: sess.tx_event.clone(),
                    })
                },
            },
        )
        .await;

    match output_result {
        Ok(output) => {
            let ExecToolCallOutput { exit_code, .. } = &output;
            let content = format_exec_output(&output);
            if *exit_code == 0 {
                Ok(content)
            } else {
                Err(FunctionCallError::RespondToModel(content))
            }
        }
        Err(CodexErr::Sandbox(error)) => {
            handle_sandbox_error(
                tool_name,
                turn_diff_tracker,
                params,
                exec_command_context,
                error,
                sandbox_type,
                sess,
                turn_context,
                &otel_event_manager,
            )
            .await
        }
        Err(e) => Err(FunctionCallError::RespondToModel(format!(
            "execution error: {e:?}"
        ))),
    }
}

#[allow(clippy::too_many_arguments)]
async fn handle_sandbox_error(
    tool_name: &str,
    turn_diff_tracker: &mut TurnDiffTracker,
    params: ExecParams,
    exec_command_context: ExecCommandContext,
    error: SandboxErr,
    sandbox_type: SandboxType,
    sess: &Session,
    turn_context: &TurnContext,
    otel_event_manager: &OtelEventManager,
) -> Result<String, FunctionCallError> {
    let call_id = exec_command_context.call_id.clone();
    let sub_id = exec_command_context.sub_id.clone();
    let cwd = exec_command_context.cwd.clone();

    if let SandboxErr::Timeout { output } = &error {
        let content = format_exec_output(output);
        return Err(FunctionCallError::RespondToModel(content));
    }

    // Early out if either the user never wants to be asked for approval, or
    // we're letting the model manage escalation requests. Otherwise, continue
    match turn_context.approval_policy {
        AskForApproval::Never | AskForApproval::OnRequest => {
            return Err(FunctionCallError::RespondToModel(format!(
                "failed in sandbox {sandbox_type:?} with execution error: {error:?}"
            )));
        }
        AskForApproval::UnlessTrusted | AskForApproval::OnFailure => (),
    }

    // Note that when `error` is `SandboxErr::Denied`, it could be a false
    // positive. That is, it may have exited with a non-zero exit code, not
    // because the sandbox denied it, but because that is its expected behavior,
    // i.e., a grep command that did not match anything. Ideally we would
    // include additional metadata on the command to indicate whether non-zero
    // exit codes merit a retry.

    // For now, we categorically ask the user to retry without sandbox and
    // emit the raw error as a background event.
    sess.notify_background_event(&sub_id, format!("Execution failed: {error}"))
        .await;

    let decision = sess
        .request_command_approval(
            sub_id.clone(),
            call_id.clone(),
            params.command.clone(),
            cwd.clone(),
            Some("command failed; retry without sandbox?".to_string()),
        )
        .await;

    match decision {
        ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
            // Persist this command as pre‑approved for the
            // remainder of the session so future
            // executions skip the sandbox directly.
            // TODO(ragona): Isn't this a bug? It always saves the command in an | fork?
            sess.add_approved_command(params.command.clone()).await;
            // Inform UI we are retrying without sandbox.
            sess.notify_background_event(&sub_id, "retrying command without sandbox")
                .await;

            otel_event_manager.tool_decision(
                tool_name,
                call_id.as_str(),
                decision,
                ToolDecisionSource::User,
            );

            // This is an escalated retry; the policy will not be
            // examined and the sandbox has been set to `None`.
            let retry_output_result = sess
                .run_exec_with_events(
                    turn_diff_tracker,
                    exec_command_context.clone(),
                    ExecInvokeArgs {
                        params,
                        sandbox_type: SandboxType::None,
                        sandbox_policy: &turn_context.sandbox_policy,
                        sandbox_cwd: &turn_context.cwd,
                        codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,
                        stdout_stream: if exec_command_context.apply_patch.is_some() {
                            None
                        } else {
                            Some(StdoutStream {
                                sub_id: sub_id.clone(),
                                call_id: call_id.clone(),
                                tx_event: sess.tx_event.clone(),
                            })
                        },
                    },
                )
                .await;

            match retry_output_result {
                Ok(retry_output) => {
                    let ExecToolCallOutput { exit_code, .. } = &retry_output;
                    let content = format_exec_output(&retry_output);
                    if *exit_code == 0 {
                        Ok(content)
                    } else {
                        Err(FunctionCallError::RespondToModel(content))
                    }
                }
                Err(e) => Err(FunctionCallError::RespondToModel(format!(
                    "retry failed: {e}"
                ))),
            }
        }
        decision @ (ReviewDecision::Denied | ReviewDecision::Abort) => {
            otel_event_manager.tool_decision(
                tool_name,
                call_id.as_str(),
                decision,
                ToolDecisionSource::User,
            );

            // Fall through to original failure handling.
            Err(FunctionCallError::RespondToModel(
                "exec command rejected by user".to_string(),
            ))
        }
    }
}

fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
    let ExecToolCallOutput {
        aggregated_output, ..
    } = exec_output;

    // Head+tail truncation for the model: show the beginning and end with an elision.
    // Clients still receive full streams; only this formatted summary is capped.

    let mut s = &aggregated_output.text;
    let prefixed_str: String;

    if exec_output.timed_out {
        prefixed_str = format!(
            "command timed out after {} milliseconds\n",
            exec_output.duration.as_millis()
        ) + s;
        s = &prefixed_str;
    }

    let total_lines = s.lines().count();
    if s.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
        return s.to_string();
    }

    let lines: Vec<&str> = s.lines().collect();
    let head_take = MODEL_FORMAT_HEAD_LINES.min(lines.len());
    let tail_take = MODEL_FORMAT_TAIL_LINES.min(lines.len().saturating_sub(head_take));
    let omitted = lines.len().saturating_sub(head_take + tail_take);

    // Join head and tail blocks (lines() strips newlines; reinsert them)
    let head_block = lines
        .iter()
        .take(head_take)
        .cloned()
        .collect::<Vec<_>>()
        .join("\n");
    let tail_block = if tail_take > 0 {
        lines[lines.len() - tail_take..].join("\n")
    } else {
        String::new()
    };
    let marker = format!("\n[... omitted {omitted} of {total_lines} lines ...]\n\n");

    // Byte budgets for head/tail around the marker
    let mut head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
    let tail_budget = MODEL_FORMAT_MAX_BYTES.saturating_sub(head_budget + marker.len());
    if tail_budget == 0 && marker.len() >= MODEL_FORMAT_MAX_BYTES {
        // Degenerate case: marker alone exceeds budget; return a clipped marker
        return take_bytes_at_char_boundary(&marker, MODEL_FORMAT_MAX_BYTES).to_string();
    }
    if tail_budget == 0 {
        // Make room for the marker by shrinking head
        head_budget = MODEL_FORMAT_MAX_BYTES.saturating_sub(marker.len());
    }

    // Enforce line-count cap by trimming head/tail lines
    let head_lines_text = head_block;
    let tail_lines_text = tail_block;
    // Build final string respecting byte budgets
    let head_part = take_bytes_at_char_boundary(&head_lines_text, head_budget);
    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len()));

    result.push_str(head_part);
    result.push_str(&marker);

    let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
    let tail_budget_final = remaining;
    let tail_part = take_last_bytes_at_char_boundary(&tail_lines_text, tail_budget_final);
    result.push_str(tail_part);

    result
}

// Truncate a &str to a byte budget at a char boundary (prefix)
#[inline]
fn take_bytes_at_char_boundary(s: &str, maxb: usize) -> &str {
    if s.len() <= maxb {
        return s;
    }
    let mut last_ok = 0;
    for (i, ch) in s.char_indices() {
        let nb = i + ch.len_utf8();
        if nb > maxb {
            break;
        }
        last_ok = nb;
    }
    &s[..last_ok]
}

// Take a suffix of a &str within a byte budget at a char boundary
#[inline]
fn take_last_bytes_at_char_boundary(s: &str, maxb: usize) -> &str {
    if s.len() <= maxb {
        return s;
    }
    let mut start = s.len();
    let mut used = 0usize;
    for (i, ch) in s.char_indices().rev() {
        let nb = ch.len_utf8();
        if used + nb > maxb {
            break;
        }
        start = i;
        used += nb;
        if start == 0 {
            break;
        }
    }
    &s[start..]
}

/// Exec output is a pre-serialized JSON payload
fn format_exec_output(exec_output: &ExecToolCallOutput) -> String {
    let ExecToolCallOutput {
        exit_code,
        duration,
        ..
    } = exec_output;

    #[derive(Serialize)]
    struct ExecMetadata {
        exit_code: i32,
        duration_seconds: f32,
    }

    #[derive(Serialize)]
    struct ExecOutput<'a> {
        output: &'a str,
        metadata: ExecMetadata,
    }

    // round to 1 decimal place
    let duration_seconds = ((duration.as_secs_f32()) * 10.0).round() / 10.0;

    let formatted_output = format_exec_output_str(exec_output);

    let payload = ExecOutput {
        output: &formatted_output,
        metadata: ExecMetadata {
            exit_code: *exit_code,
            duration_seconds,
        },
    };

    #[expect(clippy::expect_used)]
    serde_json::to_string(&payload).expect("serialize ExecOutput")
}

pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<String> {
    responses.iter().rev().find_map(|item| {
        if let ResponseItem::Message { role, content, .. } = item {
            if role == "assistant" {
                content.iter().rev().find_map(|ci| {
                    if let ContentItem::OutputText { text } = ci {
                        Some(text.clone())
                    } else {
                        None
                    }
                })
            } else {
                None
            }
        } else {
            None
        }
    })
}
fn convert_call_tool_result_to_function_call_output_payload(
    call_tool_result: &CallToolResult,
) -> FunctionCallOutputPayload {
    let CallToolResult {
        content,
        is_error,
        structured_content,
    } = call_tool_result;

    // In terms of what to send back to the model, we prefer structured_content,
    // if available, and fallback to content, otherwise.
    let mut is_success = is_error != &Some(true);
    let content = if let Some(structured_content) = structured_content
        && structured_content != &serde_json::Value::Null
        && let Ok(serialized_structured_content) = serde_json::to_string(&structured_content)
    {
        serialized_structured_content
    } else {
        match serde_json::to_string(&content) {
            Ok(serialized_content) => serialized_content,
            Err(err) => {
                // If we could not serialize either content or structured_content to
                // JSON, flag this as an error.
                is_success = false;
                err.to_string()
            }
        }
    };

    FunctionCallOutputPayload {
        content,
        success: Some(is_success),
    }
}

/// Emits an ExitedReviewMode Event with optional ReviewOutput,
/// and records a developer message with the review output.
pub(crate) async fn exit_review_mode(
    session: Arc<Session>,
    task_sub_id: String,
    review_output: Option<ReviewOutputEvent>,
) {
    let event = Event {
        id: task_sub_id,
        msg: EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
            review_output: review_output.clone(),
        }),
    };
    session.send_event(event).await;

    let mut user_message = String::new();
    if let Some(out) = review_output {
        let mut findings_str = String::new();
        let text = out.overall_explanation.trim();
        if !text.is_empty() {
            findings_str.push_str(text);
        }
        if !out.findings.is_empty() {
            let block = format_review_findings_block(&out.findings, None);
            findings_str.push_str(&format!("\n{block}"));
        }
        user_message.push_str(&format!(
            r#"<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>
"#));
    } else {
        user_message.push_str(r#"<user_action>
  <context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
  <action>review</action>
  <results>
  None.
  </results>
</user_action>
"#);
    }

    session
        .record_conversation_items(&[ResponseItem::Message {
            id: None,
            role: "user".to_string(),
            content: vec![ContentItem::InputText { text: user_message }],
        }])
        .await;
}

#[cfg(test)]
pub(crate) use tests::make_session_and_context;

#[cfg(test)]
mod tests {
    use super::*;
    use crate::config::ConfigOverrides;
    use crate::config::ConfigToml;

    use crate::protocol::CompactedItem;
    use crate::protocol::InitialHistory;
    use crate::protocol::ResumedHistory;
    use crate::state::TaskKind;
    use crate::tasks::SessionTask;
    use crate::tasks::SessionTaskContext;
    use codex_protocol::mcp_protocol::AuthMode;
    use codex_protocol::models::ContentItem;
    use codex_protocol::models::ResponseItem;

    use mcp_types::ContentBlock;
    use mcp_types::TextContent;
    use pretty_assertions::assert_eq;
    use serde::Deserialize;
    use serde_json::json;
    use std::path::PathBuf;
    use std::sync::Arc;
    use std::time::Duration as StdDuration;
    use tokio::time::Duration;
    use tokio::time::sleep;

    #[test]
    fn reconstruct_history_matches_live_compactions() {
        let (session, turn_context) = make_session_and_context();
        let (rollout_items, expected) = sample_rollout(&session, &turn_context);

        let reconstructed = session.reconstruct_history_from_rollout(&turn_context, &rollout_items);

        assert_eq!(expected, reconstructed);
    }

    #[test]
    fn record_initial_history_reconstructs_resumed_transcript() {
        let (session, turn_context) = make_session_and_context();
        let (rollout_items, expected) = sample_rollout(&session, &turn_context);

        tokio_test::block_on(session.record_initial_history(
            &turn_context,
            InitialHistory::Resumed(ResumedHistory {
                conversation_id: ConversationId::default(),
                history: rollout_items,
                rollout_path: PathBuf::from("/tmp/resume.jsonl"),
            }),
        ));

        let actual = tokio_test::block_on(async { session.state.lock().await.history_snapshot() });
        assert_eq!(expected, actual);
    }

    #[test]
    fn record_initial_history_reconstructs_forked_transcript() {
        let (session, turn_context) = make_session_and_context();
        let (rollout_items, expected) = sample_rollout(&session, &turn_context);

        tokio_test::block_on(
            session.record_initial_history(&turn_context, InitialHistory::Forked(rollout_items)),
        );

        let actual = tokio_test::block_on(async { session.state.lock().await.history_snapshot() });
        assert_eq!(expected, actual);
    }

    #[test]
    fn prefers_structured_content_when_present() {
        let ctr = CallToolResult {
            // Content present but should be ignored because structured_content is set.
            content: vec![text_block("ignored")],
            is_error: None,
            structured_content: Some(json!({
                "ok": true,
                "value": 42
            })),
        };

        let got = convert_call_tool_result_to_function_call_output_payload(&ctr);
        let expected = FunctionCallOutputPayload {
            content: serde_json::to_string(&json!({
                "ok": true,
                "value": 42
            }))
            .unwrap(),
            success: Some(true),
        };

        assert_eq!(expected, got);
    }

    #[test]
    fn model_truncation_head_tail_by_lines() {
        // Build 400 short lines so line-count limit, not byte budget, triggers truncation
        let lines: Vec<String> = (1..=400).map(|i| format!("line{i}")).collect();
        let full = lines.join("\n");

        let exec = ExecToolCallOutput {
            exit_code: 0,
            stdout: StreamOutput::new(String::new()),
            stderr: StreamOutput::new(String::new()),
            aggregated_output: StreamOutput::new(full),
            duration: StdDuration::from_secs(1),
            timed_out: false,
        };

        let out = format_exec_output_str(&exec);

        // Expect elision marker with correct counts
        let omitted = 400 - MODEL_FORMAT_MAX_LINES; // 144
        let marker = format!("\n[... omitted {omitted} of 400 lines ...]\n\n");
        assert!(out.contains(&marker), "missing marker: {out}");

        // Validate head and tail
        let parts: Vec<&str> = out.split(&marker).collect();
        assert_eq!(parts.len(), 2, "expected one marker split");
        let head = parts[0];
        let tail = parts[1];

        let expected_head: String = (1..=MODEL_FORMAT_HEAD_LINES)
            .map(|i| format!("line{i}"))
            .collect::<Vec<_>>()
            .join("\n");
        assert!(head.starts_with(&expected_head), "head mismatch");

        let expected_tail: String = ((400 - MODEL_FORMAT_TAIL_LINES + 1)..=400)
            .map(|i| format!("line{i}"))
            .collect::<Vec<_>>()
            .join("\n");
        assert!(tail.ends_with(&expected_tail), "tail mismatch");
    }

    #[test]
    fn model_truncation_respects_byte_budget() {
        // Construct a large output (about 100kB) so byte budget dominates
        let big_line = "x".repeat(100);
        let full = std::iter::repeat_n(big_line, 1000)
            .collect::<Vec<_>>()
            .join("\n");

        let exec = ExecToolCallOutput {
            exit_code: 0,
            stdout: StreamOutput::new(String::new()),
            stderr: StreamOutput::new(String::new()),
            aggregated_output: StreamOutput::new(full.clone()),
            duration: StdDuration::from_secs(1),
            timed_out: false,
        };

        let out = format_exec_output_str(&exec);
        assert!(out.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget");
        assert!(out.contains("omitted"), "should contain elision marker");

        // Ensure head and tail are drawn from the original
        assert!(full.starts_with(out.chars().take(8).collect::<String>().as_str()));
        assert!(
            full.ends_with(
                out.chars()
                    .rev()
                    .take(8)
                    .collect::<String>()
                    .chars()
                    .rev()
                    .collect::<String>()
                    .as_str()
            )
        );
    }

    #[test]
    fn includes_timed_out_message() {
        let exec = ExecToolCallOutput {
            exit_code: 0,
            stdout: StreamOutput::new(String::new()),
            stderr: StreamOutput::new(String::new()),
            aggregated_output: StreamOutput::new("Command output".to_string()),
            duration: StdDuration::from_secs(1),
            timed_out: true,
        };

        let out = format_exec_output_str(&exec);

        assert_eq!(
            out,
            "command timed out after 1000 milliseconds\nCommand output"
        );
    }

    #[test]
    fn falls_back_to_content_when_structured_is_null() {
        let ctr = CallToolResult {
            content: vec![text_block("hello"), text_block("world")],
            is_error: None,
            structured_content: Some(serde_json::Value::Null),
        };

        let got = convert_call_tool_result_to_function_call_output_payload(&ctr);
        let expected = FunctionCallOutputPayload {
            content: serde_json::to_string(&vec![text_block("hello"), text_block("world")])
                .unwrap(),
            success: Some(true),
        };

        assert_eq!(expected, got);
    }

    #[test]
    fn success_flag_reflects_is_error_true() {
        let ctr = CallToolResult {
            content: vec![text_block("unused")],
            is_error: Some(true),
            structured_content: Some(json!({ "message": "bad" })),
        };

        let got = convert_call_tool_result_to_function_call_output_payload(&ctr);
        let expected = FunctionCallOutputPayload {
            content: serde_json::to_string(&json!({ "message": "bad" })).unwrap(),
            success: Some(false),
        };

        assert_eq!(expected, got);
    }

    #[test]
    fn success_flag_true_with_no_error_and_content_used() {
        let ctr = CallToolResult {
            content: vec![text_block("alpha")],
            is_error: Some(false),
            structured_content: None,
        };

        let got = convert_call_tool_result_to_function_call_output_payload(&ctr);
        let expected = FunctionCallOutputPayload {
            content: serde_json::to_string(&vec![text_block("alpha")]).unwrap(),
            success: Some(true),
        };

        assert_eq!(expected, got);
    }

    fn text_block(s: &str) -> ContentBlock {
        ContentBlock::TextContent(TextContent {
            annotations: None,
            text: s.to_string(),
            r#type: "text".to_string(),
        })
    }

    fn otel_event_manager(conversation_id: ConversationId, config: &Config) -> OtelEventManager {
        OtelEventManager::new(
            conversation_id,
            config.model.as_str(),
            config.model_family.slug.as_str(),
            None,
            Some(AuthMode::ChatGPT),
            false,
            "test".to_string(),
        )
    }

    pub(crate) fn make_session_and_context() -> (Session, TurnContext) {
        let (tx_event, _rx_event) = async_channel::unbounded();
        let codex_home = tempfile::tempdir().expect("create temp dir");
        let config = Config::load_from_base_config_with_overrides(
            ConfigToml::default(),
            ConfigOverrides::default(),
            codex_home.path().to_path_buf(),
        )
        .expect("load default test config");
        let config = Arc::new(config);
        let conversation_id = ConversationId::default();
        let otel_event_manager = otel_event_manager(conversation_id, config.as_ref());
        let client = ModelClient::new(
            config.clone(),
            None,
            otel_event_manager,
            config.model_provider.clone(),
            config.model_reasoning_effort,
            config.model_reasoning_summary,
            conversation_id,
        );
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &config.model_family,
            include_plan_tool: config.include_plan_tool,
            include_apply_patch_tool: config.include_apply_patch_tool,
            include_web_search_request: config.tools_web_search_request,
            use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
            include_view_image_tool: config.include_view_image_tool,
            experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
        });
        let turn_context = TurnContext {
            client,
            cwd: config.cwd.clone(),
            base_instructions: config.base_instructions.clone(),
            user_instructions: config.user_instructions.clone(),
            approval_policy: config.approval_policy,
            sandbox_policy: config.sandbox_policy.clone(),
            shell_environment_policy: config.shell_environment_policy.clone(),
            tools_config,
            is_review_mode: false,
            final_output_json_schema: None,
        };
        let services = SessionServices {
            mcp_connection_manager: McpConnectionManager::default(),
            session_manager: ExecSessionManager::default(),
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::default(),
            rollout: Mutex::new(None),
            codex_linux_sandbox_exe: None,
            user_shell: shell::Shell::Unknown,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
        };
        let session = Session {
            conversation_id,
            tx_event,
            state: Mutex::new(SessionState::new()),
            active_turn: Mutex::new(None),
            services,
            next_internal_sub_id: AtomicU64::new(0),
        };
        (session, turn_context)
    }

    // Like make_session_and_context, but returns Arc<Session> and the event receiver
    // so tests can assert on emitted events.
    fn make_session_and_context_with_rx() -> (
        Arc<Session>,
        Arc<TurnContext>,
        async_channel::Receiver<Event>,
    ) {
        let (tx_event, rx_event) = async_channel::unbounded();
        let codex_home = tempfile::tempdir().expect("create temp dir");
        let config = Config::load_from_base_config_with_overrides(
            ConfigToml::default(),
            ConfigOverrides::default(),
            codex_home.path().to_path_buf(),
        )
        .expect("load default test config");
        let config = Arc::new(config);
        let conversation_id = ConversationId::default();
        let otel_event_manager = otel_event_manager(conversation_id, config.as_ref());
        let client = ModelClient::new(
            config.clone(),
            None,
            otel_event_manager,
            config.model_provider.clone(),
            config.model_reasoning_effort,
            config.model_reasoning_summary,
            conversation_id,
        );
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &config.model_family,
            include_plan_tool: config.include_plan_tool,
            include_apply_patch_tool: config.include_apply_patch_tool,
            include_web_search_request: config.tools_web_search_request,
            use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
            include_view_image_tool: config.include_view_image_tool,
            experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
        });
        let turn_context = Arc::new(TurnContext {
            client,
            cwd: config.cwd.clone(),
            base_instructions: config.base_instructions.clone(),
            user_instructions: config.user_instructions.clone(),
            approval_policy: config.approval_policy,
            sandbox_policy: config.sandbox_policy.clone(),
            shell_environment_policy: config.shell_environment_policy.clone(),
            tools_config,
            is_review_mode: false,
            final_output_json_schema: None,
        });
        let services = SessionServices {
            mcp_connection_manager: McpConnectionManager::default(),
            session_manager: ExecSessionManager::default(),
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::default(),
            rollout: Mutex::new(None),
            codex_linux_sandbox_exe: None,
            user_shell: shell::Shell::Unknown,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
        };
        let session = Arc::new(Session {
            conversation_id,
            tx_event,
            state: Mutex::new(SessionState::new()),
            active_turn: Mutex::new(None),
            services,
            next_internal_sub_id: AtomicU64::new(0),
        });
        (session, turn_context, rx_event)
    }

    #[derive(Clone, Copy)]
    struct NeverEndingTask(TaskKind);

    #[async_trait::async_trait]
    impl SessionTask for NeverEndingTask {
        fn kind(&self) -> TaskKind {
            self.0
        }

        async fn run(
            self: Arc<Self>,
            _session: Arc<SessionTaskContext>,
            _ctx: Arc<TurnContext>,
            _sub_id: String,
            _input: Vec<InputItem>,
        ) -> Option<String> {
            loop {
                sleep(Duration::from_secs(60)).await;
            }
        }

        async fn abort(&self, session: Arc<SessionTaskContext>, sub_id: &str) {
            if let TaskKind::Review = self.0 {
                exit_review_mode(session.clone_session(), sub_id.to_string(), None).await;
            }
        }
    }

    #[tokio::test]
    async fn abort_regular_task_emits_turn_aborted_only() {
        let (sess, tc, rx) = make_session_and_context_with_rx();
        let sub_id = "sub-regular".to_string();
        let input = vec![InputItem::Text {
            text: "hello".to_string(),
        }];
        sess.spawn_task(
            Arc::clone(&tc),
            sub_id.clone(),
            input,
            NeverEndingTask(TaskKind::Regular),
        )
        .await;

        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;

        let evt = rx.recv().await.expect("event");
        match evt.msg {
            EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason),
            other => panic!("unexpected event: {other:?}"),
        }
        assert!(rx.try_recv().is_err());
    }

    #[tokio::test]
    async fn abort_review_task_emits_exited_then_aborted_and_records_history() {
        let (sess, tc, rx) = make_session_and_context_with_rx();
        let sub_id = "sub-review".to_string();
        let input = vec![InputItem::Text {
            text: "start review".to_string(),
        }];
        sess.spawn_task(
            Arc::clone(&tc),
            sub_id.clone(),
            input,
            NeverEndingTask(TaskKind::Review),
        )
        .await;

        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;

        let first = rx.recv().await.expect("first event");
        match first.msg {
            EventMsg::ExitedReviewMode(ev) => assert!(ev.review_output.is_none()),
            other => panic!("unexpected first event: {other:?}"),
        }
        let second = rx.recv().await.expect("second event");
        match second.msg {
            EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason),
            other => panic!("unexpected second event: {other:?}"),
        }

        let history = sess.history_snapshot().await;
        let found = history.iter().any(|item| match item {
            ResponseItem::Message { role, content, .. } if role == "user" => {
                content.iter().any(|ci| match ci {
                    ContentItem::InputText { text } => {
                        text.contains("<user_action>")
                            && text.contains("review")
                            && text.contains("interrupted")
                    }
                    _ => false,
                })
            }
            _ => false,
        });
        assert!(
            found,
            "synthetic review interruption not recorded in history"
        );
    }

    fn sample_rollout(
        session: &Session,
        turn_context: &TurnContext,
    ) -> (Vec<RolloutItem>, Vec<ResponseItem>) {
        let mut rollout_items = Vec::new();
        let mut live_history = ConversationHistory::new();

        let initial_context = session.build_initial_context(turn_context);
        for item in &initial_context {
            rollout_items.push(RolloutItem::ResponseItem(item.clone()));
        }
        live_history.record_items(initial_context.iter());

        let user1 = ResponseItem::Message {
            id: None,
            role: "user".to_string(),
            content: vec![ContentItem::InputText {
                text: "first user".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&user1));
        rollout_items.push(RolloutItem::ResponseItem(user1.clone()));

        let assistant1 = ResponseItem::Message {
            id: None,
            role: "assistant".to_string(),
            content: vec![ContentItem::OutputText {
                text: "assistant reply one".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&assistant1));
        rollout_items.push(RolloutItem::ResponseItem(assistant1.clone()));

        let summary1 = "summary one";
        let snapshot1 = live_history.contents();
        let user_messages1 = collect_user_messages(&snapshot1);
        let rebuilt1 = build_compacted_history(
            session.build_initial_context(turn_context),
            &user_messages1,
            summary1,
        );
        live_history.replace(rebuilt1);
        rollout_items.push(RolloutItem::Compacted(CompactedItem {
            message: summary1.to_string(),
        }));

        let user2 = ResponseItem::Message {
            id: None,
            role: "user".to_string(),
            content: vec![ContentItem::InputText {
                text: "second user".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&user2));
        rollout_items.push(RolloutItem::ResponseItem(user2.clone()));

        let assistant2 = ResponseItem::Message {
            id: None,
            role: "assistant".to_string(),
            content: vec![ContentItem::OutputText {
                text: "assistant reply two".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&assistant2));
        rollout_items.push(RolloutItem::ResponseItem(assistant2.clone()));

        let summary2 = "summary two";
        let snapshot2 = live_history.contents();
        let user_messages2 = collect_user_messages(&snapshot2);
        let rebuilt2 = build_compacted_history(
            session.build_initial_context(turn_context),
            &user_messages2,
            summary2,
        );
        live_history.replace(rebuilt2);
        rollout_items.push(RolloutItem::Compacted(CompactedItem {
            message: summary2.to_string(),
        }));

        let user3 = ResponseItem::Message {
            id: None,
            role: "user".to_string(),
            content: vec![ContentItem::InputText {
                text: "third user".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&user3));
        rollout_items.push(RolloutItem::ResponseItem(user3.clone()));

        let assistant3 = ResponseItem::Message {
            id: None,
            role: "assistant".to_string(),
            content: vec![ContentItem::OutputText {
                text: "assistant reply three".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&assistant3));
        rollout_items.push(RolloutItem::ResponseItem(assistant3.clone()));

        (rollout_items, live_history.contents())
    }

    #[tokio::test]
    async fn rejects_escalated_permissions_when_policy_not_on_request() {
        use crate::exec::ExecParams;
        use crate::protocol::AskForApproval;
        use crate::protocol::SandboxPolicy;
        use crate::turn_diff_tracker::TurnDiffTracker;
        use std::collections::HashMap;

        let (session, mut turn_context) = make_session_and_context();
        // Ensure policy is NOT OnRequest so the early rejection path triggers
        turn_context.approval_policy = AskForApproval::OnFailure;

        let params = ExecParams {
            command: if cfg!(windows) {
                vec![
                    "cmd.exe".to_string(),
                    "/C".to_string(),
                    "echo hi".to_string(),
                ]
            } else {
                vec![
                    "/bin/sh".to_string(),
                    "-c".to_string(),
                    "echo hi".to_string(),
                ]
            },
            cwd: turn_context.cwd.clone(),
            timeout_ms: Some(1000),
            env: HashMap::new(),
            with_escalated_permissions: Some(true),
            justification: Some("test".to_string()),
        };

        let params2 = ExecParams {
            with_escalated_permissions: Some(false),
            ..params.clone()
        };

        let mut turn_diff_tracker = TurnDiffTracker::new();

        let tool_name = "shell";
        let sub_id = "test-sub".to_string();
        let call_id = "test-call".to_string();

        let resp = handle_container_exec_with_params(
            tool_name,
            params,
            &session,
            &turn_context,
            &mut turn_diff_tracker,
            sub_id,
            call_id,
        )
        .await;

        let Err(FunctionCallError::RespondToModel(output)) = resp else {
            panic!("expected error result");
        };

        let expected = format!(
            "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
            policy = turn_context.approval_policy
        );

        pretty_assertions::assert_eq!(output, expected);

        // Now retry the same command WITHOUT escalated permissions; should succeed.
        // Force DangerFullAccess to avoid platform sandbox dependencies in tests.
        turn_context.sandbox_policy = SandboxPolicy::DangerFullAccess;

        let resp2 = handle_container_exec_with_params(
            tool_name,
            params2,
            &session,
            &turn_context,
            &mut turn_diff_tracker,
            "test-sub".to_string(),
            "test-call-2".to_string(),
        )
        .await;

        let output = resp2.expect("expected Ok result");

        #[derive(Deserialize, PartialEq, Eq, Debug)]
        struct ResponseExecMetadata {
            exit_code: i32,
        }

        #[derive(Deserialize)]
        struct ResponseExecOutput {
            output: String,
            metadata: ResponseExecMetadata,
        }

        let exec_output: ResponseExecOutput =
            serde_json::from_str(&output).expect("valid exec output json");

        pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
        assert!(exec_output.output.contains("hi"));
    }
}
-												[Rust] Allow resuming a session that was killed with ctrl + c (#1387)

Previously, if you ctrl+c'd a conversation, all subsequent turns would
400 because the Responses API never got a response for one of its call
ids. This ensures that if we aren't sending a call id by hand, we
generate a synthetic aborted call.

Fixes #1244 


https://github.com/user-attachments/assets/5126354f-b970-45f5-8c65-f811bca8294a
											
										
										
											2025-06-26 14:40:42 -04:00
+								use std::borrow::Cow;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use std::collections::HashMap;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								use std::fmt::Debug;
-												fix: ensure cwd for conversation and sandbox are separate concerns (#3874)

Previous to this PR, both of these functions take a single `cwd`:


https://github.com/openai/codex/blob/71038381aa0f51aa62e1a2bcc7cbf26a05b141f3/codex-rs/core/src/seatbelt.rs#L19-L25


https://github.com/openai/codex/blob/71038381aa0f51aa62e1a2bcc7cbf26a05b141f3/codex-rs/core/src/landlock.rs#L16-L23

whereas `cwd` and `sandbox_cwd` should be set independently (fixed in
this PR).

Added `sandbox_distinguishes_command_and_policy_cwds()` to
`codex-rs/exec/tests/suite/sandbox.rs` to verify this.
											
										
										
											2025-09-18 14:37:06 -07:00
+								use std::path::Path;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use std::path::PathBuf;
 								use std::sync::Arc;
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								use std::sync::atomic::AtomicU64;
-												fix: add optional timeout to McpClient::send_request() (#852)

We now impose a 10s timeout on the initial `tools/list` request to an
MCP server. We do not apply a timeout for other types of requests yet,
but we should start enforcing those, as well.
											
										
										
											2025-05-07 12:56:38 -07:00
+								use std::time::Duration;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Move CodexAuth and AuthManager to the core crate (#3074)

Fix a long standing layering issue.
											
										
										
											2025-09-02 18:36:19 -07:00
+								use crate::AuthManager;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								use crate::client_common::REVIEW_PROMPT;
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
+								use crate::event_mapping::map_response_item_to_event_messages;
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								use crate::function_tool::FunctionCallError;
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
+								use crate::review_format::format_review_findings_block;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								use crate::terminal;
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								use crate::user_notification::UserNotifier;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use async_channel::Receiver;
 								use async_channel::Sender;
-												fix: ensure apply_patch resolves relative paths against workdir or project cwd (#810)

https://github.com/openai/codex/pull/800 kicked off some work to be more
disciplined about honoring the `cwd` param passed in rather than
assuming `std::env::current_dir()` as the `cwd`. As part of this, we
need to ensure `apply_patch` calls honor the appropriate `cwd` as well,
which is significant if the paths in the `apply_patch` arg are not
absolute paths themselves. Failing that:

- The `apply_patch` function call can contain an optional`workdir`
param, so:
- If specified and is an absolute path, it should be used to resolve
relative paths
- If specified and is a relative path, should be resolved against
`Config.cwd` and then any relative paths will be resolved against the
result
- If `workdir` is not specified on the function call, relative paths
should be resolved against `Config.cwd`

Note that we had a similar issue in the TypeScript CLI that was fixed in
https://github.com/openai/codex/pull/556.

As part of the fix, this PR introduces `ApplyPatchAction` so clients can
deal with that instead of the raw `HashMap<PathBuf,
ApplyPatchFileChange>`. This enables us to enforce, by construction,
that all paths contained in the `ApplyPatchAction` are absolute paths.
											
										
										
											2025-05-04 12:32:51 -07:00
+								use codex_apply_patch::ApplyPatchAction;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use codex_apply_patch::MaybeApplyPatchVerified;
-												Update cargo to 2024 edition (#842)

Some effects of this change:
- New formatting changes across many files. No functionality changes
should occur from that.
- Calls to `set_env` are considered unsafe, since this only happens in
tests we wrap them in `unsafe` blocks
											
										
										
											2025-05-07 08:37:48 -07:00
+								use codex_apply_patch::maybe_parse_apply_patch_verified;
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								use codex_protocol::mcp_protocol::ConversationId;
-												Change forking to read the rollout from file (#3440)

This PR changes get history op to get path. Then, forking will use a
path. This will help us have one unified codepath for resuming/forking
conversations. Will also help in having rollout history in order. It
also fixes a bug where you won't see the UI when resuming after forking.
											
										
										
											2025-09-10 17:42:54 -07:00
+								use codex_protocol::protocol::ConversationPathResponseEvent;
-												Fix EventMsg Optional (#3604)


											
										
										
											2025-09-14 17:34:33 -07:00
+								use codex_protocol::protocol::ExitedReviewModeEvent;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								use codex_protocol::protocol::ReviewRequest;
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								use codex_protocol::protocol::RolloutItem;
-												send context window with task started (#2752)

- Send context window with task started
- Accounting for changing the model per turn
											
										
										
											2025-08-27 00:04:21 -07:00
+								use codex_protocol::protocol::TaskStartedEvent;
-												fix: introduce EventMsg::TurnAborted (#2365)

Introduces `EventMsg::TurnAborted` that should be sent in response to
`Op::Interrupt`.

In the MCP server, updates the handling of a
`ClientRequest::InterruptConversation` request such that it sends the
`Op::Interrupt` but does not respond to the request until it sees an
`EventMsg::TurnAborted`.
											
										
										
											2025-08-17 21:40:31 -07:00
+								use codex_protocol::protocol::TurnAbortReason;
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								use codex_protocol::protocol::TurnContextItem;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use futures::prelude::*;
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								use mcp_types::CallToolResult;
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								use serde::Deserialize;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use serde::Serialize;
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								use serde_json;
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								use serde_json::Value;
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								use tokio::sync::Mutex;
-												Update cargo to 2024 edition (#842)

Some effects of this change:
- New formatting changes across many files. No functionality changes
should occur from that.
- Calls to `set_env` are considered unsafe, since this only happens in
tests we wrap them in `unsafe` blocks
											
										
										
											2025-05-07 08:37:48 -07:00
+								use tokio::sync::oneshot;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use tracing::debug;
-												feat: make cwd a required field of Config so we stop assuming std::env::current_dir() in a session (#800)

In order to expose Codex via an MCP server, I realized that we should be
taking `cwd` as a parameter rather than assuming
`std::env::current_dir()` as the `cwd`. Specifically, the user may want
to start a session in a directory other than the one where the MCP
server has been started.

This PR makes `cwd: PathBuf` a required field of `Session` and threads
it all the way through, though I think there is still an issue with not
honoring `workdir` for `apply_patch`, which is something we also had to
fix in the TypeScript version: https://github.com/openai/codex/pull/556.

This also adds `-C`/`--cd` to change the cwd via the command line.

To test, I ran:

```
cargo run --bin codex -- exec -C /tmp 'show the output of ls'
```

and verified it showed the contents of my `/tmp` folder instead of
`$PWD`.
											
										
										
											2025-05-04 10:57:12 -07:00
+								use tracing::error;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use tracing::info;
 								use tracing::trace;
 								use tracing::warn;
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								use crate::ModelProviderInfo;
-												fix: tighten up checks against writable folders for SandboxPolicy (#2338)

I was looking at the implementation of `Session::get_writable_roots()`,
which did not seem right, as it was a copy of writable roots, which is
not guaranteed to be in sync with the `sandbox_policy` field.

I looked at who was calling `get_writable_roots()` and its only call
site was `apply_patch()` in `codex-rs/core/src/apply_patch.rs`, which
took the roots and forwarded them to `assess_patch_safety()` in
`safety.rs`. I updated `assess_patch_safety()` to take `sandbox_policy:
&SandboxPolicy` instead of `writable_roots: &[PathBuf]` (and replaced
`Session::get_writable_roots()` with `Session::get_sandbox_policy()`).

Within `safety.rs`, it was fairly easy to update
`is_write_patch_constrained_to_writable_paths()` to work with
`SandboxPolicy`, and in particular, it is far more accurate because, for
better or worse, `SandboxPolicy::get_writable_roots_with_cwd()` _returns
an empty vec_ for `SandboxPolicy::DangerFullAccess`, suggesting that
_nothing_ is writable when in reality _everything_ is writable. With
this PR, `is_write_patch_constrained_to_writable_paths()` now does the
right thing for each variant of `SandboxPolicy`.

I thought this would be the end of the story, but it turned out that
`test_writable_roots_constraint()` in `safety.rs` needed to be updated,
as well. In particular, the test was writing to
`std::env::current_dir()` instead of a `TempDir`, which I suspect was a
holdover from earlier when `SandboxPolicy::WorkspaceWrite` would always
make `TMPDIR` writable on macOS, which made it hard to write tests to
verify `SandboxPolicy` in `TMPDIR`. Fortunately, we now have
`exclude_tmpdir_env_var` as an option on
`SandboxPolicy::WorkspaceWrite`, so I was able to update the test to
preserve the existing behavior, but to no longer write to
`std::env::current_dir()`.







---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2338).
* #2345
* #2329
* #2343
* #2340
* __->__ #2338
											
										
										
											2025-08-15 09:06:15 -07:00
+								use crate::apply_patch;
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								use crate::apply_patch::ApplyPatchExec;
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								use crate::apply_patch::CODEX_APPLY_PATCH_ARG1;
 								use crate::apply_patch::InternalApplyPatchInvocation;
-												chore: split apply_patch logic out of codex.rs and into apply_patch.rs (#1703)

This is a straight refactor, moving apply-patch-related code from
`codex.rs` and into the new `apply_patch.rs` file. The only "logical"
change is inlining `#[allow(clippy::unwrap_used)]` instead of declaring
`#![allow(clippy::unwrap_used)]` at the top of the file (which is
currently the case in `codex.rs`).

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1703).
* #1705
* __->__ #1703
* #1702
* #1698
* #1697
											
										
										
											2025-07-28 09:51:22 -07:00
+								use crate::apply_patch::convert_apply_patch_to_protocol;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::client::ModelClient;
-												feat: support the chat completions API in the Rust CLI (#862)

This is a substantial PR to add support for the chat completions API,
which in turn makes it possible to use non-OpenAI model providers (just
like in the TypeScript CLI):

* It moves a number of structs from `client.rs` to `client_common.rs` so
they can be shared.
* It introduces support for the chat completions API in
`chat_completions.rs`.
* It updates `ModelProviderInfo` so that `env_key` is `Option<String>`
instead of `String` (for e.g., ollama) and adds a `wire_api` field
* It updates `client.rs` to choose between `stream_responses()` and
`stream_chat_completions()` based on the `wire_api` for the
`ModelProviderInfo`
* It updates the `exec` and TUI CLIs to no longer fail if the
`OPENAI_API_KEY` environment variable is not set
* It updates the TUI so that `EventMsg::Error` is displayed more
prominently when it occurs, particularly now that it is important to
alert users to the `CodexErr::EnvVar` variant.
* `CodexErr::EnvVar` was updated to include an optional `instructions`
field so we can preserve the behavior where we direct users to
https://platform.openai.com if `OPENAI_API_KEY` is not set.
* Cleaned up the "welcome message" in the TUI to ensure the model
provider is displayed.
* Updated the docs in `codex-rs/README.md`.

To exercise the chat completions API from OpenAI models, I added the
following to my `config.toml`:

```toml
model = "gpt-4o"
model_provider = "openai-chat-completions"

[model_providers.openai-chat-completions]
name = "OpenAI using Chat Completions"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
wire_api = "chat"
```

Though to test a non-OpenAI provider, I installed ollama with mistral
locally on my Mac because ChatGPT said that would be a good match for my
hardware:

```shell
brew install ollama
ollama serve
ollama pull mistral
```

Then I added the following to my `~/.codex/config.toml`:

```toml
model = "mistral"
model_provider = "ollama"
```

Note this code could certainly use more test coverage, but I want to get
this in so folks can start playing with it.

For reference, I believe https://github.com/openai/codex/pull/247 was
roughly the comparable PR on the TypeScript side.
											
										
										
											2025-05-08 21:46:06 -07:00
+								use crate::client_common::Prompt;
 								use crate::client_common::ResponseEvent;
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
+								use crate::config::Config;
-												feat: introduce support for shell_environment_policy in config.toml (#1061)

To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.

This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.

Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:

| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array&lt;string&gt; | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table&lt;string,string&gt; | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array&lt;string&gt; | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |


In particular, note that the default is `inherit = "core"`, so:

* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`

This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.

Though if nothing else, previous to this change:

```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```

But after this change it does not print anything (as desired).

One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
											
										
										
											2025-05-22 09:51:19 -07:00
+								use crate::config_types::ShellEnvironmentPolicy;
-												feat: support the chat completions API in the Rust CLI (#862)

This is a substantial PR to add support for the chat completions API,
which in turn makes it possible to use non-OpenAI model providers (just
like in the TypeScript CLI):

* It moves a number of structs from `client.rs` to `client_common.rs` so
they can be shared.
* It introduces support for the chat completions API in
`chat_completions.rs`.
* It updates `ModelProviderInfo` so that `env_key` is `Option<String>`
instead of `String` (for e.g., ollama) and adds a `wire_api` field
* It updates `client.rs` to choose between `stream_responses()` and
`stream_chat_completions()` based on the `wire_api` for the
`ModelProviderInfo`
* It updates the `exec` and TUI CLIs to no longer fail if the
`OPENAI_API_KEY` environment variable is not set
* It updates the TUI so that `EventMsg::Error` is displayed more
prominently when it occurs, particularly now that it is important to
alert users to the `CodexErr::EnvVar` variant.
* `CodexErr::EnvVar` was updated to include an optional `instructions`
field so we can preserve the behavior where we direct users to
https://platform.openai.com if `OPENAI_API_KEY` is not set.
* Cleaned up the "welcome message" in the TUI to ensure the model
provider is displayed.
* Updated the docs in `codex-rs/README.md`.

To exercise the chat completions API from OpenAI models, I added the
following to my `config.toml`:

```toml
model = "gpt-4o"
model_provider = "openai-chat-completions"

[model_providers.openai-chat-completions]
name = "OpenAI using Chat Completions"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
wire_api = "chat"
```

Though to test a non-OpenAI provider, I installed ollama with mistral
locally on my Mac because ChatGPT said that would be a good match for my
hardware:

```shell
brew install ollama
ollama serve
ollama pull mistral
```

Then I added the following to my `~/.codex/config.toml`:

```toml
model = "mistral"
model_provider = "ollama"
```

Note this code could certainly use more test coverage, but I want to get
this in so folks can start playing with it.

For reference, I believe https://github.com/openai/codex/pull/247 was
roughly the comparable PR on the TypeScript side.
											
										
										
											2025-05-08 21:46:06 -07:00
+								use crate::conversation_history::ConversationHistory;
-												[context] Store context messages in rollouts (#2243)

## Summary
Currently, we use request-time logic to determine the user_instructions
and environment_context messages. This means that neither of these
values can change over time as conversations go on. We want to add in
additional details here, so we're migrating these to save these messages
to the rollout file instead. This is simpler for the client, and allows
us to append additional environment_context messages to each turn if we
want

## Testing
- [x] Integration test coverage
- [x] Tested locally with a few turns, confirmed model could reference
environment context and cached token metrics were reasonably high
											
										
										
											2025-08-14 14:51:13 -04:00
+								use crate::environment_context::EnvironmentContext;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::error::CodexErr;
 								use crate::error::Result as CodexResult;
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								use crate::error::SandboxErr;
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								use crate::error::get_error_message_ui;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::exec::ExecParams;
 								use crate::exec::ExecToolCallOutput;
 								use crate::exec::SandboxType;
-												feat: stream exec stdout events (#1786)

## Summary
- stream command stdout as `ExecCommandStdout` events
- forward streamed stdout to clients and ignore in human output
processor
- adjust call sites for new streaming API
											
										
										
											2025-08-01 13:04:34 -07:00
+								use crate::exec::StdoutStream;
-												Include output truncation message in tool call results (#2183)

To avoid model being confused about incomplete output.
											
										
										
											2025-08-11 11:52:05 -07:00
+								use crate::exec::StreamOutput;
-												Update cargo to 2024 edition (#842)

Some effects of this change:
- New formatting changes across many files. No functionality changes
should occur from that.
- Calls to `set_env` are considered unsafe, since this only happens in
tests we wrap them in `unsafe` blocks
											
										
										
											2025-05-07 08:37:48 -07:00
+								use crate::exec::process_exec_tool_call;
-												feat: StreamableShell with exec_command and write_stdin tools (#2574)


											
										
										
											2025-08-22 18:10:55 -07:00
+								use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
 								use crate::exec_command::ExecCommandParams;
-												fix: Scope ExecSessionManager to Session instead of using global singleton (#2664)

The `SessionManager` in `exec_command` owns a number of
`ExecCommandSession` objects where `ExecCommandSession` has a
non-trivial implementation of `Drop`, so we want to be able to drop an
individual `SessionManager` to help ensure things get cleaned up in a
timely fashion. To that end, we should have one `SessionManager` per
session rather than one global one for the lifetime of the CLI process.
											
										
										
											2025-08-24 22:52:49 -07:00
+								use crate::exec_command::ExecSessionManager;
-												feat: StreamableShell with exec_command and write_stdin tools (#2574)


											
										
										
											2025-08-22 18:10:55 -07:00
+								use crate::exec_command::WRITE_STDIN_TOOL_NAME;
 								use crate::exec_command::WriteStdinParams;
-												feat: introduce support for shell_environment_policy in config.toml (#1061)

To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.

This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.

Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:

| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array&lt;string&gt; | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table&lt;string,string&gt; | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array&lt;string&gt; | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |


In particular, note that the default is `inherit = "core"`, so:

* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`

This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.

Though if nothing else, previous to this change:

```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```

But after this change it does not print anything (as desired).

One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
											
										
										
											2025-05-22 09:51:19 -07:00
+								use crate::exec_env::create_env;
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
+								use crate::mcp_connection_manager::McpConnectionManager;
 								use crate::mcp_tool_call::handle_mcp_tool_call;
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								use crate::model_family::find_family_for_model;
-												send context window with task started (#2752)

- Send context window with task started
- Accounting for changing the model per turn
											
										
										
											2025-08-27 00:04:21 -07:00
+								use crate::openai_model_info::get_model_info;
-												[tools] Add apply_patch tool (#2303)

## Summary
We've been seeing a number of issues and reports with our synthetic
`apply_patch` tool, e.g. #802. Let's make this a real tool - in my
anecdotal testing, it's critical for GPT-OSS models, but I'd like to
make it the standard across GPT-5 and codex models as well.

## Testing
- [x] Tested locally
- [x] Integration test
											
										
										
											2025-08-15 11:55:53 -04:00
+								use crate::openai_tools::ApplyPatchToolArgs;
-												[core] Separate tools config from openai client (#1858)

## Summary
In an effort to make tools easier to work with and more configurable,
I'm introducing `ToolConfig` and updating `Prompt` to take in a general
list of Tools. I think this is simpler and better for a few reasons:
- We can easily assemble tools from various sources (our own harness,
mcp servers, etc.) and we can consolidate the logic for constructing the
logic in one place that is separate from serialization.
- client.rs no longer needs arbitrary config values, it just takes in a
list of tools to serialize

A hefty portion of the PR is now updating our conversion of
`mcp_types::Tool` to `OpenAITool`, but considering that @bolinfest
accurately called this out as a TODO long ago, I think it's time we
tackled it.

## Testing
- [x] Experimented locally, no changes, as expected
- [x] Added additional unit tests
- [x] Responded to rust-review
											
										
										
											2025-08-05 19:27:52 -07:00
+								use crate::openai_tools::ToolsConfig;
-												fix: build is broken on main; introduce ToolsConfigParams to help fix (#2663)

`ToolsConfig::new()` taking a large number of boolean params was hard to
manage and it finally bit us (see
https://github.com/openai/codex/pull/2660). This changes
`ToolsConfig::new()` so that it takes a struct (and also reduces the
visibility of some members, where possible).
											
										
										
											2025-08-24 22:43:42 -07:00
+								use crate::openai_tools::ToolsConfigParams;
-												[core] Separate tools config from openai client (#1858)

## Summary
In an effort to make tools easier to work with and more configurable,
I'm introducing `ToolConfig` and updating `Prompt` to take in a general
list of Tools. I think this is simpler and better for a few reasons:
- We can easily assemble tools from various sources (our own harness,
mcp servers, etc.) and we can consolidate the logic for constructing the
logic in one place that is separate from serialization.
- client.rs no longer needs arbitrary config values, it just takes in a
list of tools to serialize

A hefty portion of the PR is now updating our conversion of
`mcp_types::Tool` to `OpenAITool`, but considering that @bolinfest
accurately called this out as a TODO long ago, I think it's time we
tackled it.

## Testing
- [x] Experimented locally, no changes, as expected
- [x] Added additional unit tests
- [x] Responded to rust-review
											
										
										
											2025-08-05 19:27:52 -07:00
+								use crate::openai_tools::get_openai_tools;
-												[1/3] Parse exec commands and format them more nicely in the UI (#2095)

# Note for reviewers
The bulk of this PR is in in the new file, `parse_command.rs`. This file
is designed to be written TDD and implemented with Codex. Do not worry
about reviewing the code, just review the unit tests (if you want). If
any cases are missing, we'll add more tests and have Codex fix them.

I think the best approach will be to land and iterate. I have some
follow-ups I want to do after this lands. The next PR after this will
let us merge (and dedupe) multiple sequential cells of the same such as
multiple read commands. The deduping will also be important because the
model often reads the same file multiple times in a row in chunks

===

This PR formats common commands like reading, formatting, testing, etc
more nicely:

It tries to extract things like file names, tests and falls back to the
cmd if it doesn't. It also only shows stdout/err if the command failed.

<img width="770" height="238" alt="CleanShot 2025-08-09 at 16 05 15"
src="https://github.com/user-attachments/assets/0ead179a-8910-486b-aa3d-7d26264d751e"
/>
<img width="348" height="158" alt="CleanShot 2025-08-09 at 16 05 32"
src="https://github.com/user-attachments/assets/4302681b-5e87-4ff3-85b4-0252c6c485a9"
/>
<img width="834" height="324" alt="CleanShot 2025-08-09 at 16 05 56 2"
src="https://github.com/user-attachments/assets/09fb3517-7bd6-40f6-a126-4172106b700f"
/>

Part 2: https://github.com/openai/codex/pull/2097
Part 3: https://github.com/openai/codex/pull/2110
											
										
										
											2025-08-11 11:26:15 -07:00
+								use crate::parse_command::parse_command;
-												Add an experimental plan tool (#1726)

This adds a tool the model can call to update a plan. The tool doesn't
actually _do_ anything but it gives clients a chance to read and render
the structured plan. We will likely iterate on the prompt and tools
exposed for planning over time.
											
										
										
											2025-07-29 11:22:02 -07:00
+								use crate::plan_tool::handle_update_plan;
-												fix: always send full instructions when using the Responses API (#1207)

This fixes a longstanding error in the Rust CLI where `codex.rs`
contained an errant `is_first_turn` check that would exclude the user
instructions for subsequent "turns" of a conversation when using the
responses API (i.e., when `previous_response_id` existed).

While here, renames `Prompt.instructions` to `Prompt.user_instructions`
since we now have quite a few levels of instructions floating around.
Also removed an unnecessary use of `clone()` in
`Prompt.get_full_instructions()`.
											
										
										
											2025-06-03 09:40:19 -07:00
+								use crate::project_doc::get_user_instructions;
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								use crate::protocol::AgentMessageDeltaEvent;
 								use crate::protocol::AgentReasoningDeltaEvent;
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								use crate::protocol::AgentReasoningRawContentDeltaEvent;
-												Re-add markdown streaming (#2029)

Wait for newlines, then render markdown on a line by line basis. Word wrap it for the current terminal size and then spit it out line by line into the UI. Also adds tests and fixes some UI regressions.
											
										
										
											2025-08-12 17:37:28 -07:00
+								use crate::protocol::AgentReasoningSectionBreakEvent;
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								use crate::protocol::ApplyPatchApprovalRequestEvent;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::AskForApproval;
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								use crate::protocol::BackgroundEventEvent;
 								use crate::protocol::ErrorEvent;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::Event;
 								use crate::protocol::EventMsg;
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								use crate::protocol::ExecApprovalRequestEvent;
 								use crate::protocol::ExecCommandBeginEvent;
 								use crate::protocol::ExecCommandEndEvent;
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								use crate::protocol::FileChange;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::InputItem;
-												Custom /prompts (#2696)

Adds custom `/prompts` to `~/.codex/prompts/<command>.md`.

<img width="239" height="107" alt="Screenshot 2025-08-25 at 6 22 42 PM"
src="https://github.com/user-attachments/assets/fe6ebbaa-1bf6-49d3-95f9-fdc53b752679"
/>

---

Details:

1. Adds `Op::ListCustomPrompts` to core.
2. Returns `ListCustomPromptsResponse` with list of `CustomPrompt`
(name, content).
3. TUI calls the operation on load, and populates the custom prompts
(excluding prompts that collide with builtins).
4. Selecting the custom prompt automatically sends the prompt to the
agent.
											
										
										
											2025-08-28 19:16:39 -07:00
+								use crate::protocol::ListCustomPromptsResponseEvent;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::Op;
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								use crate::protocol::PatchApplyBeginEvent;
 								use crate::protocol::PatchApplyEndEvent;
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								use crate::protocol::RateLimitSnapshot;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::ReviewDecision;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								use crate::protocol::ReviewOutputEvent;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::SandboxPolicy;
-												fix: tighten up some logic around session timestamps and ids (#922)

* update `SessionConfigured` event to include the UUID for the session
* show the UUID in the Rust TUI
* use local timestamps in log files instead of UTC
* include timestamps in log file names for easier discovery
											
										
										
											2025-05-13 19:22:16 -07:00
+								use crate::protocol::SessionConfiguredEvent;
-												Parse and expose stream errors (#2540)


											
										
										
											2025-08-21 01:15:24 -07:00
+								use crate::protocol::StreamErrorEvent;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::Submission;
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								use crate::protocol::TokenCountEvent;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								use crate::protocol::TokenUsage;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								use crate::protocol::TurnDiffEvent;
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
+								use crate::protocol::WebSearchBeginEvent;
-												feat: save session transcripts when using Rust CLI (#845)

This adds support for saving transcripts when using the Rust CLI. Like
the TypeScript CLI, it saves the transcript to `~/.codex/sessions`,
though it uses JSONL for the file format (and `.jsonl` for the file
extension) so that even if Codex crashes, what was written to the
`.jsonl` file should generally still be valid JSONL content.
											
										
										
											2025-05-07 13:49:15 -07:00
+								use crate::rollout::RolloutRecorder;
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								use crate::rollout::RolloutRecorderParams;
-												Update cargo to 2024 edition (#842)

Some effects of this change:
- New formatting changes across many files. No functionality changes
should occur from that.
- Calls to `set_env` are considered unsafe, since this only happens in
tests we wrap them in `unsafe` blocks
											
										
										
											2025-05-07 08:37:48 -07:00
+								use crate::safety::SafetyCheck;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::safety::assess_command_safety;
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								use crate::safety::assess_safety_for_untrusted_command;
-												Optionally run using user profile (#1678)


											
										
										
											2025-07-25 11:45:23 -07:00
+								use crate::shell;
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								use crate::state::ActiveTurn;
 								use crate::state::SessionServices;
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								use crate::tasks::CompactTask;
 								use crate::tasks::RegularTask;
 								use crate::tasks::ReviewTask;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								use crate::turn_diff_tracker::TurnDiffTracker;
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								use crate::unified_exec::UnifiedExecSessionManager;
-												Dividing UserMsgs into categories to send it back to the tui (#3127)

This PR does the following:

- divides user msgs into 3 categories: plain, user instructions, and
environment context
- Centralizes adding user instructions and environment context to a
degree
- Improve the integration testing

Building on top of #3123

Specifically this
[comment](https://github.com/openai/codex/pull/3123#discussion_r2319885089).
We need to send the user message while ignoring the User Instructions
and Environment Context we attach.
											
										
										
											2025-09-03 22:34:50 -07:00
+								use crate::user_instructions::UserInstructions;
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								use crate::user_notification::UserNotification;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::util::backoff;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								use codex_otel::otel_event_manager::OtelEventManager;
 								use codex_otel::otel_event_manager::ToolDecisionSource;
-												consolidate reasoning enums into one (#2428)

We have three enums for each of reasoning summaries and reasoning effort
with same values. They can be consolidated into one.
											
										
										
											2025-08-18 11:50:17 -07:00
+								use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
 								use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
-												Custom /prompts (#2696)

Adds custom `/prompts` to `~/.codex/prompts/<command>.md`.

<img width="239" height="107" alt="Screenshot 2025-08-25 at 6 22 42 PM"
src="https://github.com/user-attachments/assets/fe6ebbaa-1bf6-49d3-95f9-fdc53b752679"
/>

---

Details:

1. Adds `Op::ListCustomPrompts` to core.
2. Returns `ListCustomPromptsResponse` with list of `CustomPrompt`
(name, content).
3. TUI calls the operation on load, and populates the custom prompts
(excluding prompts that collide with builtins).
4. Selecting the custom prompt automatically sends the prompt to the
agent.
											
										
										
											2025-08-28 19:16:39 -07:00
+								use codex_protocol::custom_prompts::CustomPrompt;
-												Move models.rs to protocol (#2595)

Moving models.rs to protocol so we can use them in `Codex` operations
											
										
										
											2025-08-22 15:18:54 -07:00
+								use codex_protocol::models::ContentItem;
 								use codex_protocol::models::FunctionCallOutputPayload;
 								use codex_protocol::models::LocalShellAction;
 								use codex_protocol::models::ResponseInputItem;
 								use codex_protocol::models::ResponseItem;
 								use codex_protocol::models::ShellToolCallParams;
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								use codex_protocol::protocol::InitialHistory;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Reland "refactor transcript view to handle HistoryCells" (#3753)

Reland of #3538
											
										
										
											2025-09-18 13:55:53 -07:00
+								pub mod compact;
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								use self::compact::build_compacted_history;
 								use self::compact::collect_user_messages;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								/// The high-level interface to the Codex system.
 								/// It operates as a queue pair where you send submissions and receive events.
 								pub struct Codex {
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								    next_id: AtomicU64,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    tx_sub: Sender<Submission>,
 								    rx_event: Receiver<Event>,
 								}
-												chore: update Codex::spawn() to return a struct instead of a tuple (#1677)

Also update `init_codex()` to return a `struct` instead of a tuple, as well.
											
										
										
											2025-07-27 20:01:35 -07:00
+								/// Wrapper returned by [`Codex::spawn`] containing the spawned [`Codex`],
 								/// the submission id for the initial `ConfigureSession` request and the
 								/// unique session id.
 								pub struct CodexSpawnOk {
 								    pub codex: Codex,
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								    pub conversation_id: ConversationId,
-												chore: update Codex::spawn() to return a struct instead of a tuple (#1677)

Also update `init_codex()` to return a `struct` instead of a tuple, as well.
											
										
										
											2025-07-27 20:01:35 -07:00
+								}
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								pub(crate) const INITIAL_SUBMIT_ID: &str = "";
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								pub(crate) const SUBMISSION_CHANNEL_CAPACITY: usize = 64;
 								// Model-formatting limits: clients get full streams; oonly content sent to the model is truncated.
 								pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
 								pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
 								pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
 								pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
 								pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								impl Codex {
-												chore: update Codex::spawn() to return a struct instead of a tuple (#1677)

Also update `init_codex()` to return a `struct` instead of a tuple, as well.
											
										
										
											2025-07-27 20:01:35 -07:00
+								    /// Spawn a new [`Codex`] and initialize the session.
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								    pub async fn spawn(
 								        config: Config,
 								        auth_manager: Arc<AuthManager>,
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								        conversation_history: InitialHistory,
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								    ) -> CodexResult<CodexSpawnOk> {
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								        let (tx_sub, rx_sub) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
-												Stream model responses (#1810)

Stream models thoughts and responses instead of waiting for the whole
thing to come through. Very rough right now, but I'm making the risk call to push through.
											
										
										
											2025-08-04 21:23:22 -07:00
+								        let (tx_event, rx_event) = async_channel::unbounded();
-												feat: add support for AGENTS.md in Rust CLI (#885)

The TypeScript CLI already has support for including the contents of
`AGENTS.md` in the instructions sent with the first turn of a
conversation. This PR brings this functionality to the Rust CLI.

To be considered, `AGENTS.md` must be in the `cwd` of the session, or in
one of the parent folders up to a Git/filesystem root (whichever is
encountered first).

By default, a maximum of 32 KiB of `AGENTS.md` will be included, though
this is configurable using the new-in-this-PR `project_doc_max_bytes`
option in `config.toml`.
											
										
										
											2025-05-10 17:52:59 -07:00
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
+								        let user_instructions = get_user_instructions(&config).await;
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        let config = Arc::new(config);
 								        let configure_session = ConfigureSession {
-												feat: read `model_provider` and `model_providers` from config.toml (#853)

This is the first step in supporting other model providers in the Rust
CLI. Specifically, this PR adds support for the new entries in `Config`
and `ConfigOverrides` to specify a `ModelProviderInfo`, which is the
basic config needed for an LLM provider. This PR does not get us all the
way there yet because `client.rs` still categorically appends
`/responses` to the URL and expects the endpoint to support the OpenAI
Responses API. Will fix that next!
											
										
										
											2025-05-07 17:38:28 -07:00
+								            provider: config.model_provider.clone(),
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								            model: config.model.clone(),
-												feat: make reasoning effort/summaries configurable (#1199)

Previous to this PR, we always set `reasoning` when making a request
using the Responses API:


https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111

Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:

```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```

We take a cue from the TypeScript CLI, which does a check on the model
name:


https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789

This PR does a similar check, though also adds support for the following
config options:

```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```

This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)

This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
											
										
										
											2025-06-02 16:01:34 -07:00
+								            model_reasoning_effort: config.model_reasoning_effort,
 								            model_reasoning_summary: config.model_reasoning_summary,
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
+								            user_instructions,
 								            base_instructions: config.base_instructions.clone(),
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								            approval_policy: config.approval_policy,
 								            sandbox_policy: config.sandbox_policy.clone(),
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								            notify: UserNotifier::new(config.notify.clone()),
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								            cwd: config.cwd.clone(),
 								        };
-												[mcp-server] Add reply tool call (#1643)

## Summary
Adds a new mcp tool call, `codex-reply`, so we can continue existing
sessions. This is a first draft and does not yet support sessions from
previous processes.

## Testing
- [x] tested with mcp client
											
										
										
											2025-07-21 21:01:56 -07:00
+								        // Generate a unique ID for the lifetime of this Codex session.
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								        let (session, turn_context) = Session::new(
 								            configure_session,
 								            config.clone(),
 								            auth_manager.clone(),
 								            tx_event.clone(),
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								            conversation_history,
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								        )
 								        .await
 								        .map_err(|e| {
 								            error!("Failed to create session: {e:#}");
 								            CodexErr::InternalAgentDied
 								        })?;
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								        let conversation_id = session.conversation_id;
-												chore: introduce ConversationManager as a clearinghouse for all conversations (#2240)

This PR does two things because after I got deep into the first one I
started pulling on the thread to the second:

- Makes `ConversationManager` the place where all in-memory
conversations are created and stored. Previously, `MessageProcessor` in
the `codex-mcp-server` crate was doing this via its `session_map`, but
this is something that should be done in `codex-core`.
- It unwinds the `ctrl_c: tokio::sync::Notify` that was threaded
throughout our code. I think this made sense at one time, but now that
we handle Ctrl-C within the TUI and have a proper `Op::Interrupt` event,
I don't think this was quite right, so I removed it. For `codex exec`
and `codex proto`, we now use `tokio::signal::ctrl_c()` directly, but we
no longer make `Notify` a field of `Codex` or `CodexConversation`.

Changes of note:

- Adds the files `conversation_manager.rs` and `codex_conversation.rs`
to `codex-core`.
- `Codex` and `CodexSpawnOk` are no longer exported from `codex-core`:
other crates must use `CodexConversation` instead (which is created via
`ConversationManager`).
- `core/src/codex_wrapper.rs` has been deleted in favor of
`ConversationManager`.
- `ConversationManager::new_conversation()` returns `NewConversation`,
which is in line with the `new_conversation` tool we want to add to the
MCP server. Note `NewConversation` includes `SessionConfiguredEvent`, so
we eliminate checks in cases like `codex-rs/core/tests/client.rs` to
verify `SessionConfiguredEvent` is the first event because that is now
internal to `ConversationManager`.
- Quite a bit of code was deleted from
`codex-rs/mcp-server/src/message_processor.rs` since it no longer has to
manage multiple conversations itself: it goes through
`ConversationManager` instead.
- `core/tests/live_agent.rs` has been deleted because I had to update a
bunch of tests and all the tests in here were ignored, and I don't think
anyone ever ran them, so this was just technical debt, at this point.
- Removed `notify_on_sigint()` from `util.rs` (and in a follow-up, I
hope to refactor the blandly-named `util.rs` into more descriptive
files).
- In general, I started replacing local variables named `codex` as
`conversation`, where appropriate, though admittedly I didn't do it
through all the integration tests because that would have added a lot of
noise to this PR.




---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2240).
* #2264
* #2263
* __->__ #2240
											
										
										
											2025-08-13 13:38:18 -07:00
 								        // This task will run until Op::Shutdown is received.
-												chore: enable clippy::redundant_clone (#3489)

Created this PR by:

- adding `redundant_clone` to `[workspace.lints.clippy]` in
`cargo-rs/Cargol.toml`
- running `cargo clippy --tests --fix`
- running `just fmt`

Though I had to clean up one instance of the following that resulted:

```rust
let codex = codex;
```
											
										
										
											2025-09-11 11:59:37 -07:00
+								        tokio::spawn(submission_loop(session, turn_context, config, rx_sub));
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								        let codex = Codex {
 								            next_id: AtomicU64::new(0),
 								            tx_sub,
 								            rx_event,
 								        };
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								        Ok(CodexSpawnOk {
 								            codex,
 								            conversation_id,
 								        })
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								    }
 								    /// Submit the `op` wrapped in a `Submission` with a unique ID.
 								    pub async fn submit(&self, op: Op) -> CodexResult<String> {
 								        let id = self
 								            .next_id
 								            .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
 								            .to_string();
 								        let sub = Submission { id: id.clone(), op };
 								        self.submit_with_id(sub).await?;
 								        Ok(id)
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								    /// Use sparingly: prefer `submit()` so Codex is responsible for generating
 								    /// unique IDs for each submission.
 								    pub async fn submit_with_id(&self, sub: Submission) -> CodexResult<()> {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        self.tx_sub
 								            .send(sub)
 								            .await
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								            .map_err(|_| CodexErr::InternalAgentDied)?;
 								        Ok(())
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
 								    pub async fn next_event(&self) -> CodexResult<Event> {
 								        let event = self
 								            .rx_event
 								            .recv()
 								            .await
 								            .map_err(|_| CodexErr::InternalAgentDied)?;
 								        Ok(event)
 								    }
 								}
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								use crate::state::SessionState;
-												fix: make all fields of Session private (#2285)

As `Session` needs a bit of work, it will make things easier to move
around if we can start by reducing the extent of its public API. This
makes all the fields private, though adds three `pub(crate)` getters.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2285).
* #2287
* #2286
* __->__ #2285
											
										
										
											2025-08-13 22:53:54 -07:00
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								/// Context for an initialized model agent
 								///
 								/// A session has at most 1 running task at a time, and can be interrupted by user input.
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
+								pub(crate) struct Session {
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								    conversation_id: ConversationId,
-												fix: make all fields of Session private (#2285)

As `Session` needs a bit of work, it will make things easier to move
around if we can start by reducing the extent of its public API. This
makes all the fields private, though adds three `pub(crate)` getters.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2285).
* #2287
* #2286
* __->__ #2285
											
										
										
											2025-08-13 22:53:54 -07:00
+								    tx_event: Sender<Event>,
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								    state: Mutex<SessionState>,
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    pub(crate) active_turn: Mutex<Option<ActiveTurn>>,
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								    services: SessionServices,
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    next_internal_sub_id: AtomicU64,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								}
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								/// The context needed for a single turn of the conversation.
 								#[derive(Debug)]
 								pub(crate) struct TurnContext {
 								    pub(crate) client: ModelClient,
 								    /// The session's current working directory. All relative paths provided by
 								    /// the model as well as sandbox policies are resolved against this path
 								    /// instead of `std::env::current_dir()`.
 								    pub(crate) cwd: PathBuf,
 								    pub(crate) base_instructions: Option<String>,
 								    pub(crate) user_instructions: Option<String>,
 								    pub(crate) approval_policy: AskForApproval,
 								    pub(crate) sandbox_policy: SandboxPolicy,
 								    pub(crate) shell_environment_policy: ShellEnvironmentPolicy,
 								    pub(crate) tools_config: ToolsConfig,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    pub(crate) is_review_mode: bool,
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								    pub(crate) final_output_json_schema: Option<Value>,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								}
 								impl TurnContext {
 								    fn resolve_path(&self, path: Option<String>) -> PathBuf {
 								        path.as_ref()
 								            .map(PathBuf::from)
 								            .map_or_else(|| self.cwd.clone(), |p| self.cwd.join(p))
 								    }
 								}
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								/// Configure the model session.
 								struct ConfigureSession {
 								    /// Provider identifier ("openai", "openrouter", ...).
 								    provider: ModelProviderInfo,
 								    /// If not specified, server will use its default model.
 								    model: String,
-												feat: reasoning effort as optional (#3527)

Allow the reasoning effort to be optional
											
										
										
											2025-09-12 12:06:33 -07:00
+								    model_reasoning_effort: Option<ReasoningEffortConfig>,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								    model_reasoning_summary: ReasoningSummaryConfig,
 								    /// Model instructions that are appended to the base instructions.
 								    user_instructions: Option<String>,
 								    /// Base instructions override.
 								    base_instructions: Option<String>,
 								    /// When to escalate for approval for execution
 								    approval_policy: AskForApproval,
 								    /// How to sandbox commands executed in the system
 								    sandbox_policy: SandboxPolicy,
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								    notify: UserNotifier,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
 								    /// Working directory that should be treated as the *root* of the
 								    /// session. All relative paths supplied by the model as well as the
 								    /// execution sandbox are resolved against this directory **instead**
 								    /// of the process-wide current working directory. CLI front-ends are
 								    /// expected to expand this to an absolute path before sending the
 								    /// `ConfigureSession` operation so that the business-logic layer can
 								    /// operate deterministically.
 								    cwd: PathBuf,
 								}
-												feat: make cwd a required field of Config so we stop assuming std::env::current_dir() in a session (#800)

In order to expose Codex via an MCP server, I realized that we should be
taking `cwd` as a parameter rather than assuming
`std::env::current_dir()` as the `cwd`. Specifically, the user may want
to start a session in a directory other than the one where the MCP
server has been started.

This PR makes `cwd: PathBuf` a required field of `Session` and threads
it all the way through, though I think there is still an issue with not
honoring `workdir` for `apply_patch`, which is something we also had to
fix in the TypeScript version: https://github.com/openai/codex/pull/556.

This also adds `-C`/`--cd` to change the cwd via the command line.

To test, I ran:

```
cargo run --bin codex -- exec -C /tmp 'show the output of ls'
```

and verified it showed the contents of my `/tmp` folder instead of
`$PWD`.
											
										
										
											2025-05-04 10:57:12 -07:00
+								impl Session {
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								    async fn new(
 								        configure_session: ConfigureSession,
 								        config: Arc<Config>,
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								        auth_manager: Arc<AuthManager>,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        tx_event: Sender<Event>,
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
+								        initial_history: InitialHistory,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    ) -> anyhow::Result<(Arc<Self>, TurnContext)> {
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        let ConfigureSession {
 								            provider,
 								            model,
 								            model_reasoning_effort,
 								            model_reasoning_summary,
 								            user_instructions,
 								            base_instructions,
 								            approval_policy,
 								            sandbox_policy,
 								            notify,
 								            cwd,
 								        } = configure_session;
 								        debug!("Configuring session: model={model}; provider={provider:?}");
 								        if !cwd.is_absolute() {
 								            return Err(anyhow::anyhow!("cwd is not absolute: {cwd:?}"));
 								        }
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								        let (conversation_id, rollout_params) = match &initial_history {
 								            InitialHistory::New | InitialHistory::Forked(_) => {
 								                let conversation_id = ConversationId::default();
 								                (
 								                    conversation_id,
 								                    RolloutRecorderParams::new(conversation_id, user_instructions.clone()),
 								                )
 								            }
 								            InitialHistory::Resumed(resumed_history) => (
 								                resumed_history.conversation_id,
 								                RolloutRecorderParams::resume(resumed_history.rollout_path.clone()),
 								            ),
 								        };
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        // Error messages to dispatch after SessionConfigured is sent.
 								        let mut post_session_configured_error_events = Vec::<Event>::new();
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								        // Kick off independent async setup tasks in parallel to reduce startup latency.
 								        //
 								        // - initialize RolloutRecorder with new or resumed session info
 								        // - spin up MCP connection manager
 								        // - perform default shell discovery
 								        // - load history metadata
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								        let rollout_fut = RolloutRecorder::new(&config, rollout_params);
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
-												[MCP] Introduce an experimental official rust sdk based mcp client (#4252)

The [official Rust
SDK](https://github.com/modelcontextprotocol/rust-sdk/tree/57fc428c578a1a3fe851ee0838bf068bda120eb3)
has come a long way since we first started our mcp client implementation
5 months ago and, today, it is much more complete than our own
stdio-only implementation.

This PR introduces a new config flag `experimental_use_rmcp_client`
which will use a new mcp client powered by the sdk instead of our own.

To keep this PR simple, I've only implemented the same stdio MCP
functionality that we had but will expand on it with future PRs.

---------

Co-authored-by: pakrym-oai <pakrym@openai.com>
											
										
										
											2025-09-26 10:13:37 -07:00
+								        let mcp_fut = McpConnectionManager::new(
 								            config.mcp_servers.clone(),
 								            config.use_experimental_use_rmcp_client,
 								        );
-												Back out "feat: POSIX unification and snapshot sessions (#3179)" (#3430)

This reverts https://github.com/openai/codex/pull/3179.

#3179 appears to introduce a regression where sourcing dotfiles causes a
bunch of activity in the title bar (and potentially slows things down?)


https://github.com/user-attachments/assets/a68f7fb3-0749-4e0e-a321-2aa6993e01da

Verified this no longer happens after backing out #3179.

Original commit changeset: 62bd0e3d9d5a
											
										
										
											2025-09-10 12:40:24 -07:00
+								        let default_shell_fut = shell::default_user_shell();
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								        let history_meta_fut = crate::message_history::history_metadata(&config);
 								        // Join all independent futures.
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								        let (rollout_recorder, mcp_res, default_shell, (history_log_id, history_entry_count)) =
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								            tokio::join!(rollout_fut, mcp_fut, default_shell_fut, history_meta_fut);
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								        let rollout_recorder = rollout_recorder.map_err(|e| {
 								            error!("failed to initialize rollout recorder: {e:#}");
 								            anyhow::anyhow!("failed to initialize rollout recorder: {e:#}")
 								        })?;
-												fix: include rollout_path in NewConversationResponse (#3352)

Adding the `rollout_path` to the `NewConversationResponse` makes it so a
client can perform subsequent operations on a `(ConversationId,
PathBuf)` pair. #3353 will introduce support for `ArchiveConversation`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/3352).
* #3353
* __->__ #3352
											
										
										
											2025-09-09 00:11:48 -07:00
+								        let rollout_path = rollout_recorder.rollout_path.clone();
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        // Create the mutable state for the Session.
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        let state = SessionState::new();
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								        // Handle MCP manager result and record any startup failures.
 								        let (mcp_connection_manager, failed_clients) = match mcp_res {
 								            Ok((mgr, failures)) => (mgr, failures),
 								            Err(e) => {
 								                let message = format!("Failed to create MCP connection manager: {e:#}");
 								                error!("{message}");
 								                post_session_configured_error_events.push(Event {
 								                    id: INITIAL_SUBMIT_ID.to_owned(),
 								                    msg: EventMsg::Error(ErrorEvent { message }),
 								                });
 								                (McpConnectionManager::default(), Default::default())
 								            }
 								        };
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
 								        // Surface individual client start-up failures to the user.
 								        if !failed_clients.is_empty() {
 								            for (server_name, err) in failed_clients {
 								                let message = format!("MCP client for `{server_name}` failed to start: {err:#}");
 								                error!("{message}");
 								                post_session_configured_error_events.push(Event {
 								                    id: INITIAL_SUBMIT_ID.to_owned(),
 								                    msg: EventMsg::Error(ErrorEvent { message }),
 								                });
 								            }
 								        }
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        let otel_event_manager = OtelEventManager::new(
 								            conversation_id,
 								            config.model.as_str(),
 								            config.model_family.slug.as_str(),
 								            auth_manager.auth().and_then(|a| a.get_account_id()),
 								            auth_manager.auth().map(|a| a.mode),
 								            config.otel.log_user_prompt,
 								            terminal::user_agent(),
 								        );
 								        otel_event_manager.conversation_starts(
 								            config.model_provider.name.as_str(),
 								            config.model_reasoning_effort,
 								            config.model_reasoning_summary,
 								            config.model_context_window,
 								            config.model_max_output_tokens,
 								            config.model_auto_compact_token_limit,
 								            config.approval_policy,
 								            config.sandbox_policy.clone(),
 								            config.mcp_servers.keys().map(String::as_str).collect(),
 								            config.active_profile.clone(),
 								        );
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								        // Now that the conversation id is final (may have been updated by resume),
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								        // construct the model client.
 								        let client = ModelClient::new(
 								            config.clone(),
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								            Some(auth_manager.clone()),
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								            otel_event_manager,
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								            provider.clone(),
 								            model_reasoning_effort,
 								            model_reasoning_summary,
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								            conversation_id,
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								        );
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        let turn_context = TurnContext {
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								            client,
-												fix: build is broken on main; introduce ToolsConfigParams to help fix (#2663)

`ToolsConfig::new()` taking a large number of boolean params was hard to
manage and it finally bit us (see
https://github.com/openai/codex/pull/2660). This changes
`ToolsConfig::new()` so that it takes a struct (and also reduces the
visibility of some members, where possible).
											
										
										
											2025-08-24 22:43:42 -07:00
+								            tools_config: ToolsConfig::new(&ToolsConfigParams {
 								                model_family: &config.model_family,
 								                include_plan_tool: config.include_plan_tool,
 								                include_apply_patch_tool: config.include_apply_patch_tool,
 								                include_web_search_request: config.tools_web_search_request,
 								                use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
-												Add "View Image" tool (#2723)

Adds a "View Image" tool so Codex can find and see images by itself:

<img width="1772" height="420" alt="Screenshot 2025-08-26 at 10 40
04 AM"
src="https://github.com/user-attachments/assets/7a459c7b-0b86-4125-82d9-05fbb35ade03"
/>
											
										
										
											2025-08-27 17:41:23 -07:00
+								                include_view_image_tool: config.include_view_image_tool,
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								                experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
-												fix: build is broken on main; introduce ToolsConfigParams to help fix (#2663)

`ToolsConfig::new()` taking a large number of boolean params was hard to
manage and it finally bit us (see
https://github.com/openai/codex/pull/2660). This changes
`ToolsConfig::new()` so that it takes a struct (and also reduces the
visibility of some members, where possible).
											
										
										
											2025-08-24 22:43:42 -07:00
+								            }),
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								            user_instructions,
 								            base_instructions,
 								            approval_policy,
 								            sandbox_policy,
 								            shell_environment_policy: config.shell_environment_policy.clone(),
 								            cwd,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								            is_review_mode: false,
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								            final_output_json_schema: None,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        };
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        let services = SessionServices {
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								            mcp_connection_manager,
-												fix: Scope ExecSessionManager to Session instead of using global singleton (#2664)

The `SessionManager` in `exec_command` owns a number of
`ExecCommandSession` objects where `ExecCommandSession` has a
non-trivial implementation of `Drop`, so we want to be able to drop an
individual `SessionManager` to help ensure things get cleaned up in a
timely fashion. To that end, we should have one `SessionManager` per
session rather than one global one for the lifetime of the CLI process.
											
										
										
											2025-08-24 22:52:49 -07:00
+								            session_manager: ExecSessionManager::default(),
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								            unified_exec_manager: UnifiedExecSessionManager::default(),
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								            notifier: notify,
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								            rollout: Mutex::new(Some(rollout_recorder)),
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								            codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
 								            user_shell: default_shell,
 								            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        };
 								        let sess = Arc::new(Session {
 								            conversation_id,
 								            tx_event: tx_event.clone(),
 								            state: Mutex::new(state),
 								            active_turn: Mutex::new(None),
 								            services,
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								            next_internal_sub_id: AtomicU64::new(0),
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        });
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								        // Dispatch the SessionConfiguredEvent first and then report any errors.
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
+								        // If resuming, include converted initial messages in the payload so UIs can render them immediately.
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        let initial_messages = initial_history.get_event_msgs();
 								        sess.record_initial_history(&turn_context, initial_history)
 								            .await;
-												Dividing UserMsgs into categories to send it back to the tui (#3127)

This PR does the following:

- divides user msgs into 3 categories: plain, user instructions, and
environment context
- Centralizes adding user instructions and environment context to a
degree
- Improve the integration testing

Building on top of #3123

Specifically this
[comment](https://github.com/openai/codex/pull/3123#discussion_r2319885089).
We need to send the user message while ignoring the User Instructions
and Environment Context we attach.
											
										
										
											2025-09-03 22:34:50 -07:00
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        let events = std::iter::once(Event {
 								            id: INITIAL_SUBMIT_ID.to_owned(),
 								            msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								                session_id: conversation_id,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								                model,
-												feat: include reasoning_effort in NewConversationResponse (#3506)

`ClientRequest::NewConversation` picks up the reasoning level from the user's defaults in `config.toml`, so it should be reported in `NewConversationResponse`.
											
										
										
											2025-09-11 21:04:40 -07:00
+								                reasoning_effort: model_reasoning_effort,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								                history_log_id,
 								                history_entry_count,
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
+								                initial_messages,
-												fix: include rollout_path in NewConversationResponse (#3352)

Adding the `rollout_path` to the `NewConversationResponse` makes it so a
client can perform subsequent operations on a `(ConversationId,
PathBuf)` pair. #3353 will introduce support for `ArchiveConversation`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/3352).
* #3353
* __->__ #3352
											
										
										
											2025-09-09 00:11:48 -07:00
+								                rollout_path,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								            }),
 								        })
 								        .chain(post_session_configured_error_events.into_iter());
 								        for event in events {
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								            sess.send_event(event).await;
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        }
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        Ok((sess, turn_context))
-												feat: make cwd a required field of Config so we stop assuming std::env::current_dir() in a session (#800)

In order to expose Codex via an MCP server, I realized that we should be
taking `cwd` as a parameter rather than assuming
`std::env::current_dir()` as the `cwd`. Specifically, the user may want
to start a session in a directory other than the one where the MCP
server has been started.

This PR makes `cwd: PathBuf` a required field of `Session` and threads
it all the way through, though I think there is still an issue with not
honoring `workdir` for `apply_patch`, which is something we also had to
fix in the TypeScript version: https://github.com/openai/codex/pull/556.

This also adds `-C`/`--cd` to change the cwd via the command line.

To test, I ran:

```
cargo run --bin codex -- exec -C /tmp 'show the output of ls'
```

and verified it showed the contents of my `/tmp` folder instead of
`$PWD`.
											
										
										
											2025-05-04 10:57:12 -07:00
+								    }
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    fn next_internal_sub_id(&self) -> String {
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								        let id = self
 								            .next_internal_sub_id
 								            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								        format!("auto-compact-{id}")
 								    }
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								    async fn record_initial_history(
 								        &self,
 								        turn_context: &TurnContext,
 								        conversation_history: InitialHistory,
 								    ) {
 								        match conversation_history {
 								            InitialHistory::New => {
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                // Build and record initial items (user instructions + environment context)
 								                let items = self.build_initial_context(turn_context);
 								                self.record_conversation_items(&items).await;
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								            }
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								            InitialHistory::Resumed(_) | InitialHistory::Forked(_) => {
 								                let rollout_items = conversation_history.get_rollout_items();
 								                let persist = matches!(conversation_history, InitialHistory::Forked(_));
 								                // Always add response items to conversation history
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								                let reconstructed_history =
 								                    self.reconstruct_history_from_rollout(turn_context, &rollout_items);
 								                if !reconstructed_history.is_empty() {
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                    self.record_into_history(&reconstructed_history).await;
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                }
 								                // If persisting, persist all rollout items as-is (recorder filters)
 								                if persist && !rollout_items.is_empty() {
 								                    self.persist_rollout_items(&rollout_items).await;
 								                }
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								            }
 								        }
 								    }
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    /// Persist the event to rollout and send it to clients.
-												fix: make all fields of Session struct private again (#840)

https://github.com/openai/codex/pull/829 noted it introduced a circular
dep between `codex.rs` and `mcp_tool_call.rs`. This attempts to clean
things up: the circular dep still exists, but at least all the fields of
`Session` are private again.
											
										
										
											2025-05-06 16:21:35 -07:00
+								    pub(crate) async fn send_event(&self, event: Event) {
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        // Persist the event into rollout (recorder filters as needed)
 								        let rollout_items = vec![RolloutItem::EventMsg(event.msg.clone())];
 								        self.persist_rollout_items(&rollout_items).await;
-												fix: make all fields of Session struct private again (#840)

https://github.com/openai/codex/pull/829 noted it introduced a circular
dep between `codex.rs` and `mcp_tool_call.rs`. This attempts to clean
things up: the circular dep still exists, but at least all the fields of
`Session` are private again.
											
										
										
											2025-05-06 16:21:35 -07:00
+								        if let Err(e) = self.tx_event.send(event).await {
 								            error!("failed to send tool call event: {e}");
 								        }
 								    }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    pub async fn request_command_approval(
 								        &self,
 								        sub_id: String,
-												Improve messages emitted for exec failures (#1659)

1. Emit call_id to exec approval elicitations for mcp client convenience
2. Remove the `-retry` from the call id for the same reason as above but
upstream the reset behavior to the mcp client
											
										
										
											2025-07-23 11:43:53 -07:00
+								        call_id: String,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        command: Vec<String>,
 								        cwd: PathBuf,
 								        reason: Option<String>,
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								    ) -> ReviewDecision {
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        // Add the tx_approve callback to the map before sending the request.
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        let (tx_approve, rx_approve) = oneshot::channel();
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        let event_id = sub_id.clone();
 								        let prev_entry = {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            let mut active = self.active_turn.lock().await;
 								            match active.as_mut() {
 								                Some(at) => {
 								                    let mut ts = at.turn_state.lock().await;
 								                    ts.insert_pending_approval(sub_id, tx_approve)
 								                }
 								                None => None,
 								            }
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        };
 								        if prev_entry.is_some() {
 								            warn!("Overwriting existing pending approval for sub_id: {event_id}");
 								        }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        let event = Event {
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								            id: event_id,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								            msg: EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
-												Improve messages emitted for exec failures (#1659)

1. Emit call_id to exec approval elicitations for mcp client convenience
2. Remove the `-retry` from the call id for the same reason as above but
upstream the reset behavior to the mcp client
											
										
										
											2025-07-23 11:43:53 -07:00
+								                call_id,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                command,
 								                cwd,
 								                reason,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								            }),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        self.send_event(event).await;
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								        rx_approve.await.unwrap_or_default()
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
 								    pub async fn request_patch_approval(
 								        &self,
 								        sub_id: String,
-												Add call_id to patch approvals and elicitations (#1660)

Builds on https://github.com/openai/codex/pull/1659 and adds call_id to
a few more places for the same reason.
											
										
										
											2025-07-23 12:55:35 -07:00
+								        call_id: String,
-												fix: ensure apply_patch resolves relative paths against workdir or project cwd (#810)

https://github.com/openai/codex/pull/800 kicked off some work to be more
disciplined about honoring the `cwd` param passed in rather than
assuming `std::env::current_dir()` as the `cwd`. As part of this, we
need to ensure `apply_patch` calls honor the appropriate `cwd` as well,
which is significant if the paths in the `apply_patch` arg are not
absolute paths themselves. Failing that:

- The `apply_patch` function call can contain an optional`workdir`
param, so:
- If specified and is an absolute path, it should be used to resolve
relative paths
- If specified and is a relative path, should be resolved against
`Config.cwd` and then any relative paths will be resolved against the
result
- If `workdir` is not specified on the function call, relative paths
should be resolved against `Config.cwd`

Note that we had a similar issue in the TypeScript CLI that was fixed in
https://github.com/openai/codex/pull/556.

As part of the fix, this PR introduces `ApplyPatchAction` so clients can
deal with that instead of the raw `HashMap<PathBuf,
ApplyPatchFileChange>`. This enables us to enforce, by construction,
that all paths contained in the `ApplyPatchAction` are absolute paths.
											
										
										
											2025-05-04 12:32:51 -07:00
+								        action: &ApplyPatchAction,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        reason: Option<String>,
 								        grant_root: Option<PathBuf>,
 								    ) -> oneshot::Receiver<ReviewDecision> {
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        // Add the tx_approve callback to the map before sending the request.
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        let (tx_approve, rx_approve) = oneshot::channel();
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        let event_id = sub_id.clone();
 								        let prev_entry = {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            let mut active = self.active_turn.lock().await;
 								            match active.as_mut() {
 								                Some(at) => {
 								                    let mut ts = at.turn_state.lock().await;
 								                    ts.insert_pending_approval(sub_id, tx_approve)
 								                }
 								                None => None,
 								            }
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        };
 								        if prev_entry.is_some() {
 								            warn!("Overwriting existing pending approval for sub_id: {event_id}");
 								        }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        let event = Event {
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								            id: event_id,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								            msg: EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
-												Add call_id to patch approvals and elicitations (#1660)

Builds on https://github.com/openai/codex/pull/1659 and adds call_id to
a few more places for the same reason.
											
										
										
											2025-07-23 12:55:35 -07:00
+								                call_id,
-												fix: ensure apply_patch resolves relative paths against workdir or project cwd (#810)

https://github.com/openai/codex/pull/800 kicked off some work to be more
disciplined about honoring the `cwd` param passed in rather than
assuming `std::env::current_dir()` as the `cwd`. As part of this, we
need to ensure `apply_patch` calls honor the appropriate `cwd` as well,
which is significant if the paths in the `apply_patch` arg are not
absolute paths themselves. Failing that:

- The `apply_patch` function call can contain an optional`workdir`
param, so:
- If specified and is an absolute path, it should be used to resolve
relative paths
- If specified and is a relative path, should be resolved against
`Config.cwd` and then any relative paths will be resolved against the
result
- If `workdir` is not specified on the function call, relative paths
should be resolved against `Config.cwd`

Note that we had a similar issue in the TypeScript CLI that was fixed in
https://github.com/openai/codex/pull/556.

As part of the fix, this PR introduces `ApplyPatchAction` so clients can
deal with that instead of the raw `HashMap<PathBuf,
ApplyPatchFileChange>`. This enables us to enforce, by construction,
that all paths contained in the `ApplyPatchAction` are absolute paths.
											
										
										
											2025-05-04 12:32:51 -07:00
+								                changes: convert_apply_patch_to_protocol(action),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                reason,
 								                grant_root,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								            }),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        self.send_event(event).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        rx_approve
 								    }
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    pub async fn notify_approval(&self, sub_id: &str, decision: ReviewDecision) {
-												fix: drop Mutex before calling tx_approve.send() (#2876)


											
										
										
											2025-08-28 22:49:29 -07:00
+								        let entry = {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            let mut active = self.active_turn.lock().await;
 								            match active.as_mut() {
 								                Some(at) => {
 								                    let mut ts = at.turn_state.lock().await;
 								                    ts.remove_pending_approval(sub_id)
 								                }
 								                None => None,
 								            }
-												fix: drop Mutex before calling tx_approve.send() (#2876)


											
										
										
											2025-08-28 22:49:29 -07:00
+								        };
 								        match entry {
 								            Some(tx_approve) => {
 								                tx_approve.send(decision).ok();
 								            }
 								            None => {
 								                warn!("No pending approval found for sub_id: {sub_id}");
 								            }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
 								    }
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    pub async fn add_approved_command(&self, cmd: Vec<String>) {
 								        let mut state = self.state.lock().await;
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        state.add_approved_command(cmd);
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    /// Records input items: always append to conversation history and
 								    /// persist these response items to rollout.
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								    async fn record_conversation_items(&self, items: &[ResponseItem]) {
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								        self.record_into_history(items).await;
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        self.persist_rollout_response_items(items).await;
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								    }
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    fn reconstruct_history_from_rollout(
 								        &self,
 								        turn_context: &TurnContext,
 								        rollout_items: &[RolloutItem],
 								    ) -> Vec<ResponseItem> {
 								        let mut history = ConversationHistory::new();
 								        for item in rollout_items {
 								            match item {
 								                RolloutItem::ResponseItem(response_item) => {
 								                    history.record_items(std::iter::once(response_item));
 								                }
 								                RolloutItem::Compacted(compacted) => {
 								                    let snapshot = history.contents();
 								                    let user_messages = collect_user_messages(&snapshot);
 								                    let rebuilt = build_compacted_history(
 								                        self.build_initial_context(turn_context),
 								                        &user_messages,
 								                        &compacted.message,
 								                    );
 								                    history.replace(rebuilt);
 								                }
 								                _ => {}
 								            }
 								        }
 								        history.contents()
 								    }
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    /// Append ResponseItems to the in-memory conversation history only.
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    async fn record_into_history(&self, items: &[ResponseItem]) {
 								        let mut state = self.state.lock().await;
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        state.record_items(items.iter());
 								    }
 								    async fn replace_history(&self, items: Vec<ResponseItem>) {
 								        let mut state = self.state.lock().await;
 								        state.replace_history(items);
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    }
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    async fn persist_rollout_response_items(&self, items: &[ResponseItem]) {
 								        let rollout_items: Vec<RolloutItem> = items
 								            .iter()
 								            .cloned()
 								            .map(RolloutItem::ResponseItem)
 								            .collect();
 								        self.persist_rollout_items(&rollout_items).await;
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								    }
-												Reland "refactor transcript view to handle HistoryCells" (#3753)

Reland of #3538
											
										
										
											2025-09-18 13:55:53 -07:00
+								    pub(crate) fn build_initial_context(&self, turn_context: &TurnContext) -> Vec<ResponseItem> {
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        let mut items = Vec::<ResponseItem>::with_capacity(2);
 								        if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
 								            items.push(UserInstructions::new(user_instructions.to_string()).into());
 								        }
 								        items.push(ResponseItem::from(EnvironmentContext::new(
 								            Some(turn_context.cwd.clone()),
 								            Some(turn_context.approval_policy),
 								            Some(turn_context.sandbox_policy.clone()),
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            Some(self.user_shell().clone()),
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        )));
 								        items
 								    }
-												Add session loading support to Codex (#1602)

## Summary
- extend rollout format to store all session data in JSON
- add resume/write helpers for rollouts
- track session state after each conversation
- support `LoadSession` op to resume a previous rollout
- allow starting Codex with an existing session via
`experimental_resume` config variable

We need a way later for exploring the available sessions in a user
friendly way.

## Testing
- `cargo test --no-run` *(fails: `cargo: command not found`)*

------
https://chatgpt.com/codex/tasks/task_i_68792a29dd5c832190bf6930d3466fba

This video is outdated. you should use `-c experimental_resume:<full
path>` instead of `--resume <full path>`


https://github.com/user-attachments/assets/7a9975c7-aa04-4f4e-899a-9e87defd947a
											
										
										
											2025-07-18 17:04:04 -07:00
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    async fn persist_rollout_items(&self, items: &[RolloutItem]) {
-												feat: save session transcripts when using Rust CLI (#845)

This adds support for saving transcripts when using the Rust CLI. Like
the TypeScript CLI, it saves the transcript to `~/.codex/sessions`,
though it uses JSONL for the file format (and `.jsonl` for the file
extension) so that even if Codex crashes, what was written to the
`.jsonl` file should generally still be valid JSONL content.
											
										
										
											2025-05-07 13:49:15 -07:00
+								        let recorder = {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            let guard = self.services.rollout.lock().await;
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								            guard.clone()
-												feat: save session transcripts when using Rust CLI (#845)

This adds support for saving transcripts when using the Rust CLI. Like
the TypeScript CLI, it saves the transcript to `~/.codex/sessions`,
though it uses JSONL for the file format (and `.jsonl` for the file
extension) so that even if Codex crashes, what was written to the
`.jsonl` file should generally still be valid JSONL content.
											
										
										
											2025-05-07 13:49:15 -07:00
+								        };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        if let Some(rec) = recorder
 								            && let Err(e) = rec.record_items(items).await
 								        {
 								            error!("failed to record rollout items: {e:#}");
 								        }
 								    }
-												feat: save session transcripts when using Rust CLI (#845)

This adds support for saving transcripts when using the Rust CLI. Like
the TypeScript CLI, it saves the transcript to `~/.codex/sessions`,
though it uses JSONL for the file format (and `.jsonl` for the file
extension) so that even if Codex crashes, what was written to the
`.jsonl` file should generally still be valid JSONL content.
											
										
										
											2025-05-07 13:49:15 -07:00
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								    pub(crate) async fn history_snapshot(&self) -> Vec<ResponseItem> {
 								        let state = self.state.lock().await;
 								        state.history_snapshot()
 								    }
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    async fn update_token_usage_info(
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								        &self,
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								        sub_id: &str,
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								        turn_context: &TurnContext,
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								        token_usage: Option<&TokenUsage>,
 								    ) {
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								        {
 								            let mut state = self.state.lock().await;
 								            if let Some(token_usage) = token_usage {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                state.update_token_info_from_usage(
 								                    token_usage,
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								                    turn_context.client.get_model_context_window(),
 								                );
 								            }
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								        }
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								        self.send_token_count_event(sub_id).await;
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								    }
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								    async fn update_rate_limits(&self, sub_id: &str, new_rate_limits: RateLimitSnapshot) {
 								        {
 								            let mut state = self.state.lock().await;
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            state.set_rate_limits(new_rate_limits);
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								        }
 								        self.send_token_count_event(sub_id).await;
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								    }
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								    async fn send_token_count_event(&self, sub_id: &str) {
 								        let (info, rate_limits) = {
 								            let state = self.state.lock().await;
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            state.token_info_and_rate_limits()
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								        };
 								        let event = Event {
 								            id: sub_id.to_string(),
 								            msg: EventMsg::TokenCount(TokenCountEvent { info, rate_limits }),
 								        };
 								        self.send_event(event).await;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    }
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    /// Record a user input item to conversation history and also persist a
 								    /// corresponding UserMessage EventMsg to rollout.
 								    async fn record_input_and_rollout_usermsg(&self, response_input: &ResponseInputItem) {
 								        let response_item: ResponseItem = response_input.clone().into();
 								        // Add to conversation history and persist response item to rollout
 								        self.record_conversation_items(std::slice::from_ref(&response_item))
 								            .await;
 								        // Derive user message events and persist only UserMessage to rollout
 								        let msgs =
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            map_response_item_to_event_messages(&response_item, self.show_raw_agent_reasoning());
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        let user_msgs: Vec<RolloutItem> = msgs
 								            .into_iter()
 								            .filter_map(|m| match m {
 								                EventMsg::UserMessage(ev) => Some(RolloutItem::EventMsg(EventMsg::UserMessage(ev))),
 								                _ => None,
 								            })
 								            .collect();
 								        if !user_msgs.is_empty() {
 								            self.persist_rollout_items(&user_msgs).await;
-												feat: save session transcripts when using Rust CLI (#845)

This adds support for saving transcripts when using the Rust CLI. Like
the TypeScript CLI, it saves the transcript to `~/.codex/sessions`,
though it uses JSONL for the file format (and `.jsonl` for the file
extension) so that even if Codex crashes, what was written to the
`.jsonl` file should generally still be valid JSONL content.
											
										
										
											2025-05-07 13:49:15 -07:00
+								        }
 								    }
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								    async fn on_exec_command_begin(
 								        &self,
 								        turn_diff_tracker: &mut TurnDiffTracker,
 								        exec_command_context: ExecCommandContext,
 								    ) {
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								        let ExecCommandContext {
 								            sub_id,
 								            call_id,
 								            command_for_display,
 								            cwd,
 								            apply_patch,
 								        } = exec_command_context;
 								        let msg = match apply_patch {
 								            Some(ApplyPatchCommandContext {
 								                user_explicitly_approved_this_action,
 								                changes,
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								            }) => {
 								                turn_diff_tracker.on_patch_begin(&changes);
 								                EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
 								                    call_id,
 								                    auto_approved: !user_explicitly_approved_this_action,
 								                    changes,
 								                })
 								            }
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								            None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
 								                call_id,
 								                command: command_for_display.clone(),
 								                cwd,
-												fix: introduce codex-protocol crate (#2355)


											
										
										
											2025-08-15 12:44:40 -07:00
+								                parsed_cmd: parse_command(&command_for_display)
 								                    .into_iter()
 								                    .map(Into::into)
 								                    .collect(),
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								            }),
 								        };
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        let event = Event {
 								            id: sub_id.to_string(),
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								            msg,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        self.send_event(event).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								    async fn on_exec_command_end(
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        &self,
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								        turn_diff_tracker: &mut TurnDiffTracker,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        sub_id: &str,
 								        call_id: &str,
-												fix command duration display (#1806)

we were always displaying "0ms" before.

<img width="731" height="101" alt="Screenshot 2025-08-02 at 10 51 22 PM"
src="https://github.com/user-attachments/assets/f56814ed-b9a4-4164-9e78-181c60ce19b7"
/>
											
										
										
											2025-08-03 11:33:44 -07:00
+								        output: &ExecToolCallOutput,
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								        is_apply_patch: bool,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    ) {
-												fix command duration display (#1806)

we were always displaying "0ms" before.

<img width="731" height="101" alt="Screenshot 2025-08-02 at 10 51 22 PM"
src="https://github.com/user-attachments/assets/f56814ed-b9a4-4164-9e78-181c60ce19b7"
/>
											
										
										
											2025-08-03 11:33:44 -07:00
+								        let ExecToolCallOutput {
 								            stdout,
 								            stderr,
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								            aggregated_output,
-												fix command duration display (#1806)

we were always displaying "0ms" before.

<img width="731" height="101" alt="Screenshot 2025-08-02 at 10 51 22 PM"
src="https://github.com/user-attachments/assets/f56814ed-b9a4-4164-9e78-181c60ce19b7"
/>
											
										
										
											2025-08-03 11:33:44 -07:00
+								            duration,
 								            exit_code,
-												Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the
timeout.
											
										
										
											2025-09-14 14:38:26 -07:00
+								            timed_out: _,
-												fix command duration display (#1806)

we were always displaying "0ms" before.

<img width="731" height="101" alt="Screenshot 2025-08-02 at 10 51 22 PM"
src="https://github.com/user-attachments/assets/f56814ed-b9a4-4164-9e78-181c60ce19b7"
/>
											
										
										
											2025-08-03 11:33:44 -07:00
+								        } = output;
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								        // Send full stdout/stderr to clients; do not truncate.
 								        let stdout = stdout.text.clone();
 								        let stderr = stderr.text.clone();
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
+								        let formatted_output = format_exec_output_str(output);
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								        let aggregated_output: String = aggregated_output.text.clone();
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
 								        let msg = if is_apply_patch {
 								            EventMsg::PatchApplyEnd(PatchApplyEndEvent {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                call_id: call_id.to_string(),
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								                stdout,
 								                stderr,
-												fix command duration display (#1806)

we were always displaying "0ms" before.

<img width="731" height="101" alt="Screenshot 2025-08-02 at 10 51 22 PM"
src="https://github.com/user-attachments/assets/f56814ed-b9a4-4164-9e78-181c60ce19b7"
/>
											
										
										
											2025-08-03 11:33:44 -07:00
+								                success: *exit_code == 0,
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								            })
 								        } else {
 								            EventMsg::ExecCommandEnd(ExecCommandEndEvent {
 								                call_id: call_id.to_string(),
 								                stdout,
 								                stderr,
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								                aggregated_output,
-												fix command duration display (#1806)

we were always displaying "0ms" before.

<img width="731" height="101" alt="Screenshot 2025-08-02 at 10 51 22 PM"
src="https://github.com/user-attachments/assets/f56814ed-b9a4-4164-9e78-181c60ce19b7"
/>
											
										
										
											2025-08-03 11:33:44 -07:00
+								                exit_code: *exit_code,
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								                duration: *duration,
 								                formatted_output,
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								            })
 								        };
 								        let event = Event {
 								            id: sub_id.to_string(),
 								            msg,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        self.send_event(event).await;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
 								        // If this is an apply_patch, after we emit the end patch, emit a second event
 								        // with the full turn diff if there is one.
 								        if is_apply_patch {
 								            let unified_diff = turn_diff_tracker.get_unified_diff();
 								            if let Ok(Some(unified_diff)) = unified_diff {
 								                let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
 								                let event = Event {
 								                    id: sub_id.into(),
 								                    msg,
 								                };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                self.send_event(event).await;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								            }
 								        }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								    /// Runs the exec tool call and emits events for the begin and end of the
 								    /// command even on error.
 								    ///
 								    /// Returns the output of the exec tool call.
 								    async fn run_exec_with_events<'a>(
 								        &self,
 								        turn_diff_tracker: &mut TurnDiffTracker,
 								        begin_ctx: ExecCommandContext,
 								        exec_args: ExecInvokeArgs<'a>,
 								    ) -> crate::error::Result<ExecToolCallOutput> {
 								        let is_apply_patch = begin_ctx.apply_patch.is_some();
 								        let sub_id = begin_ctx.sub_id.clone();
 								        let call_id = begin_ctx.call_id.clone();
 								        self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone())
 								            .await;
 								        let result = process_exec_tool_call(
 								            exec_args.params,
 								            exec_args.sandbox_type,
 								            exec_args.sandbox_policy,
-												fix: ensure cwd for conversation and sandbox are separate concerns (#3874)

Previous to this PR, both of these functions take a single `cwd`:


https://github.com/openai/codex/blob/71038381aa0f51aa62e1a2bcc7cbf26a05b141f3/codex-rs/core/src/seatbelt.rs#L19-L25


https://github.com/openai/codex/blob/71038381aa0f51aa62e1a2bcc7cbf26a05b141f3/codex-rs/core/src/landlock.rs#L16-L23

whereas `cwd` and `sandbox_cwd` should be set independently (fixed in
this PR).

Added `sandbox_distinguishes_command_and_policy_cwds()` to
`codex-rs/exec/tests/suite/sandbox.rs` to verify this.
											
										
										
											2025-09-18 14:37:06 -07:00
+								            exec_args.sandbox_cwd,
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								            exec_args.codex_linux_sandbox_exe,
 								            exec_args.stdout_stream,
 								        )
 								        .await;
 								        let output_stderr;
 								        let borrowed: &ExecToolCallOutput = match &result {
 								            Ok(output) => output,
-												Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the
timeout.
											
										
										
											2025-09-14 14:38:26 -07:00
+								            Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output,
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								            Err(e) => {
 								                output_stderr = ExecToolCallOutput {
 								                    exit_code: -1,
-												Include output truncation message in tool call results (#2183)

To avoid model being confused about incomplete output.
											
										
										
											2025-08-11 11:52:05 -07:00
+								                    stdout: StreamOutput::new(String::new()),
 								                    stderr: StreamOutput::new(get_error_message_ui(e)),
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								                    aggregated_output: StreamOutput::new(get_error_message_ui(e)),
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								                    duration: Duration::default(),
-												Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the
timeout.
											
										
										
											2025-09-14 14:38:26 -07:00
+								                    timed_out: false,
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								                };
 								                &output_stderr
 								            }
 								        };
 								        self.on_exec_command_end(
 								            turn_diff_tracker,
 								            &sub_id,
 								            &call_id,
 								            borrowed,
 								            is_apply_patch,
 								        )
 								        .await;
 								        result
 								    }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
 								    /// Helper that emits a BackgroundEvent with the given message. This keeps
 								    /// the call‑sites terse so adding more diagnostics does not clutter the
 								    /// core agent logic.
 								    async fn notify_background_event(&self, sub_id: &str, message: impl Into<String>) {
 								        let event = Event {
 								            id: sub_id.to_string(),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								            msg: EventMsg::BackgroundEvent(BackgroundEventEvent {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                message: message.into(),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								            }),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        self.send_event(event).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
-												Parse and expose stream errors (#2540)


											
										
										
											2025-08-21 01:15:24 -07:00
+								    async fn notify_stream_error(&self, sub_id: &str, message: impl Into<String>) {
 								        let event = Event {
 								            id: sub_id.to_string(),
 								            msg: EventMsg::StreamError(StreamErrorEvent {
 								                message: message.into(),
 								            }),
 								        };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        self.send_event(event).await;
-												Parse and expose stream errors (#2540)


											
										
										
											2025-08-21 01:15:24 -07:00
+								    }
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								    /// Build the full turn input by concatenating the current conversation
 								    /// history with additional items for this turn.
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    pub async fn turn_input_with_history(&self, extra: Vec<ResponseItem>) -> Vec<ResponseItem> {
 								        let history = {
 								            let state = self.state.lock().await;
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            state.history_snapshot()
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								        };
 								        [history, extra].concat()
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								    }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    /// Returns the input if there was no task running to inject into
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    pub async fn inject_input(&self, input: Vec<InputItem>) -> Result<(), Vec<InputItem>> {
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        let mut active = self.active_turn.lock().await;
 								        match active.as_mut() {
 								            Some(at) => {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                let mut ts = at.turn_state.lock().await;
 								                ts.push_pending_input(input.into());
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								                Ok(())
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            }
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								            None => Err(input),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
 								    }
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    pub async fn get_pending_input(&self) -> Vec<ResponseInputItem> {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        let mut active = self.active_turn.lock().await;
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        match active.as_mut() {
 								            Some(at) => {
 								                let mut ts = at.turn_state.lock().await;
 								                ts.take_pending_input()
 								            }
 								            None => Vec::with_capacity(0),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
 								    }
-												fix: make all fields of Session struct private again (#840)

https://github.com/openai/codex/pull/829 noted it introduced a circular
dep between `codex.rs` and `mcp_tool_call.rs`. This attempts to clean
things up: the circular dep still exists, but at least all the fields of
`Session` are private again.
											
										
										
											2025-05-06 16:21:35 -07:00
+								    pub async fn call_tool(
 								        &self,
 								        server: &str,
 								        tool: &str,
 								        arguments: Option<serde_json::Value>,
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								    ) -> anyhow::Result<CallToolResult> {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        self.services
 								            .mcp_connection_manager
-												timeouts for mcp tool calls (#3959)

defaults to 60sec, overridable with MCP_TOOL_TIMEOUT or on a per-server
basis in the config.
											
										
										
											2025-09-22 10:30:59 -07:00
+								            .call_tool(server, tool, arguments)
-												fix: make all fields of Session struct private again (#840)

https://github.com/openai/codex/pull/829 noted it introduced a circular
dep between `codex.rs` and `mcp_tool_call.rs`. This attempts to clean
things up: the circular dep still exists, but at least all the fields of
`Session` are private again.
											
										
										
											2025-05-06 16:21:35 -07:00
+								            .await
 								    }
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    pub async fn interrupt_task(self: &Arc<Self>) {
-												fix: introduce EventMsg::TurnAborted (#2365)

Introduces `EventMsg::TurnAborted` that should be sent in response to
`Op::Interrupt`.

In the MCP server, updates the handling of a
`ClientRequest::InterruptConversation` request such that it sends the
`Op::Interrupt` but does not respond to the request until it sees an
`EventMsg::TurnAborted`.
											
										
										
											2025-08-17 21:40:31 -07:00
+								        info!("interrupt received: abort current task, if any");
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        self.abort_all_tasks(TurnAbortReason::Interrupted).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    fn interrupt_task_sync(&self) {
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        if let Ok(mut active) = self.active_turn.try_lock()
 								            && let Some(at) = active.as_mut()
 								        {
 								            at.try_clear_pending_sync();
 								            let tasks = at.drain_tasks();
 								            *active = None;
 								            for (_sub_id, task) in tasks {
 								                task.handle.abort();
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								            }
 								        }
 								    }
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								    pub(crate) fn notifier(&self) -> &UserNotifier {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        &self.services.notifier
 								    }
 								    fn user_shell(&self) -> &shell::Shell {
 								        &self.services.user_shell
 								    }
 								    fn show_raw_agent_reasoning(&self) -> bool {
 								        self.services.show_raw_agent_reasoning
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								    }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								}
 								impl Drop for Session {
 								    fn drop(&mut self) {
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								        self.interrupt_task_sync();
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
 								}
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								#[derive(Clone, Debug)]
 								pub(crate) struct ExecCommandContext {
 								    pub(crate) sub_id: String,
 								    pub(crate) call_id: String,
 								    pub(crate) command_for_display: Vec<String>,
 								    pub(crate) cwd: PathBuf,
 								    pub(crate) apply_patch: Option<ApplyPatchCommandContext>,
 								}
 								#[derive(Clone, Debug)]
 								pub(crate) struct ApplyPatchCommandContext {
 								    pub(crate) user_explicitly_approved_this_action: bool,
 								    pub(crate) changes: HashMap<PathBuf, FileChange>,
 								}
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								async fn submission_loop(
 								    sess: Arc<Session>,
 								    turn_context: TurnContext,
 								    config: Arc<Config>,
 								    rx_sub: Receiver<Submission>,
 								) {
 								    // Wrap once to avoid cloning TurnContext for each task.
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								    let mut turn_context = Arc::new(turn_context);
-												chore: introduce ConversationManager as a clearinghouse for all conversations (#2240)

This PR does two things because after I got deep into the first one I
started pulling on the thread to the second:

- Makes `ConversationManager` the place where all in-memory
conversations are created and stored. Previously, `MessageProcessor` in
the `codex-mcp-server` crate was doing this via its `session_map`, but
this is something that should be done in `codex-core`.
- It unwinds the `ctrl_c: tokio::sync::Notify` that was threaded
throughout our code. I think this made sense at one time, but now that
we handle Ctrl-C within the TUI and have a proper `Op::Interrupt` event,
I don't think this was quite right, so I removed it. For `codex exec`
and `codex proto`, we now use `tokio::signal::ctrl_c()` directly, but we
no longer make `Notify` a field of `Codex` or `CodexConversation`.

Changes of note:

- Adds the files `conversation_manager.rs` and `codex_conversation.rs`
to `codex-core`.
- `Codex` and `CodexSpawnOk` are no longer exported from `codex-core`:
other crates must use `CodexConversation` instead (which is created via
`ConversationManager`).
- `core/src/codex_wrapper.rs` has been deleted in favor of
`ConversationManager`.
- `ConversationManager::new_conversation()` returns `NewConversation`,
which is in line with the `new_conversation` tool we want to add to the
MCP server. Note `NewConversation` includes `SessionConfiguredEvent`, so
we eliminate checks in cases like `codex-rs/core/tests/client.rs` to
verify `SessionConfiguredEvent` is the first event because that is now
internal to `ConversationManager`.
- Quite a bit of code was deleted from
`codex-rs/mcp-server/src/message_processor.rs` since it no longer has to
manage multiple conversations itself: it goes through
`ConversationManager` instead.
- `core/tests/live_agent.rs` has been deleted because I had to update a
bunch of tests and all the tests in here were ignored, and I don't think
anyone ever ran them, so this was just technical debt, at this point.
- Removed `notify_on_sigint()` from `util.rs` (and in a follow-up, I
hope to refactor the blandly-named `util.rs` into more descriptive
files).
- In general, I started replacing local variables named `codex` as
`conversation`, where appropriate, though admittedly I didn't do it
through all the integration tests because that would have added a lot of
noise to this PR.




---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2240).
* #2264
* #2263
* __->__ #2240
											
										
										
											2025-08-13 13:38:18 -07:00
+								    // To break out of this loop, send Op::Shutdown.
 								    while let Ok(sub) = rx_sub.recv().await {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        debug!(?sub, "Submission");
 								        match sub.op {
 								            Op::Interrupt => {
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                sess.interrupt_task().await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								            Op::OverrideTurnContext {
 								                cwd,
 								                approval_policy,
 								                sandbox_policy,
 								                model,
 								                effort,
 								                summary,
 								            } => {
 								                // Recalculate the persistent turn context with provided overrides.
 								                let prev = Arc::clone(&turn_context);
 								                let provider = prev.client.get_provider();
 								                // Effective model + family
-												Persist model & reasoning changes (#2799)

Persists `/model` changes across both general and profile-specific
sessions.
											
										
										
											2025-09-10 13:53:46 -07:00
+								                let (effective_model, effective_family) = if let Some(ref m) = model {
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								                    let fam =
-												Persist model & reasoning changes (#2799)

Persists `/model` changes across both general and profile-specific
sessions.
											
										
										
											2025-09-10 13:53:46 -07:00
+								                        find_family_for_model(m).unwrap_or_else(|| config.model_family.clone());
 								                    (m.clone(), fam)
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								                } else {
 								                    (prev.client.get_model(), prev.client.get_model_family())
 								                };
 								                // Effective reasoning settings
 								                let effective_effort = effort.unwrap_or(prev.client.get_reasoning_effort());
 								                let effective_summary = summary.unwrap_or(prev.client.get_reasoning_summary());
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								                let auth_manager = prev.client.get_auth_manager();
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								                // Build updated config for the client
 								                let mut updated_config = (*config).clone();
 								                updated_config.model = effective_model.clone();
 								                updated_config.model_family = effective_family.clone();
-												send context window with task started (#2752)

- Send context window with task started
- Accounting for changing the model per turn
											
										
										
											2025-08-27 00:04:21 -07:00
+								                if let Some(model_info) = get_model_info(&effective_family) {
 								                    updated_config.model_context_window = Some(model_info.context_window);
 								                }
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                let otel_event_manager = prev.client.get_otel_event_manager().with_model(
 								                    updated_config.model.as_str(),
 								                    updated_config.model_family.slug.as_str(),
 								                );
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								                let client = ModelClient::new(
 								                    Arc::new(updated_config),
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								                    auth_manager,
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                    otel_event_manager,
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								                    provider,
 								                    effective_effort,
 								                    effective_summary,
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								                    sess.conversation_id,
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								                );
 								                let new_approval_policy = approval_policy.unwrap_or(prev.approval_policy);
 								                let new_sandbox_policy = sandbox_policy
 								                    .clone()
 								                    .unwrap_or(prev.sandbox_policy.clone());
 								                let new_cwd = cwd.clone().unwrap_or_else(|| prev.cwd.clone());
-												fix: build is broken on main; introduce ToolsConfigParams to help fix (#2663)

`ToolsConfig::new()` taking a large number of boolean params was hard to
manage and it finally bit us (see
https://github.com/openai/codex/pull/2660). This changes
`ToolsConfig::new()` so that it takes a struct (and also reduces the
visibility of some members, where possible).
											
										
										
											2025-08-24 22:43:42 -07:00
+								                let tools_config = ToolsConfig::new(&ToolsConfigParams {
 								                    model_family: &effective_family,
 								                    include_plan_tool: config.include_plan_tool,
 								                    include_apply_patch_tool: config.include_apply_patch_tool,
 								                    include_web_search_request: config.tools_web_search_request,
 								                    use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
-												Add "View Image" tool (#2723)

Adds a "View Image" tool so Codex can find and see images by itself:

<img width="1772" height="420" alt="Screenshot 2025-08-26 at 10 40
04 AM"
src="https://github.com/user-attachments/assets/7a459c7b-0b86-4125-82d9-05fbb35ade03"
/>
											
										
										
											2025-08-27 17:41:23 -07:00
+								                    include_view_image_tool: config.include_view_image_tool,
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								                    experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
-												fix: build is broken on main; introduce ToolsConfigParams to help fix (#2663)

`ToolsConfig::new()` taking a large number of boolean params was hard to
manage and it finally bit us (see
https://github.com/openai/codex/pull/2660). This changes
`ToolsConfig::new()` so that it takes a struct (and also reduces the
visibility of some members, where possible).
											
										
										
											2025-08-24 22:43:42 -07:00
+								                });
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
 								                let new_turn_context = TurnContext {
 								                    client,
 								                    tools_config,
 								                    user_instructions: prev.user_instructions.clone(),
 								                    base_instructions: prev.base_instructions.clone(),
 								                    approval_policy: new_approval_policy,
 								                    sandbox_policy: new_sandbox_policy.clone(),
 								                    shell_environment_policy: prev.shell_environment_policy.clone(),
 								                    cwd: new_cwd.clone(),
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								                    is_review_mode: false,
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								                    final_output_json_schema: None,
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								                };
 								                // Install the new persistent context for subsequent tasks/turns.
 								                turn_context = Arc::new(new_turn_context);
-												Persist model & reasoning changes (#2799)

Persists `/model` changes across both general and profile-specific
sessions.
											
										
										
											2025-09-10 13:53:46 -07:00
 								                // Optionally persist changes to model / effort
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								                if cwd.is_some() || approval_policy.is_some() || sandbox_policy.is_some() {
 								                    sess.record_conversation_items(&[ResponseItem::from(EnvironmentContext::new(
-												[prompt] xml-format EnvironmentContext (#2272)

## Summary
Before we land #2243, let's start printing environment_context in our
preferred format. This struct will evolve over time with new
information, xml gives us a balance of human readable without too much
parsing, llm readable, and extensible.

Also moves us over to an Option-based struct, so we can easily provide
diffs to the model.

## Testing
- [x] Updated tests to reflect new format
											
										
										
											2025-08-20 23:45:16 -07:00
+								                        cwd,
 								                        approval_policy,
 								                        sandbox_policy,
 								                        // Shell is not configurable from turn to turn
 								                        None,
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								                    ))])
 								                    .await;
 								                }
 								            }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            Op::UserInput { items } => {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                turn_context
 								                    .client
 								                    .get_otel_event_manager()
 								                    .user_prompt(&items);
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                // attempt to inject input into current task
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                if let Err(items) = sess.inject_input(items).await {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                    // no current task, spawn a new one
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								                    sess.spawn_task(Arc::clone(&turn_context), sub.id, items, RegularTask)
 								                        .await;
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								                }
 								            }
 								            Op::UserTurn {
 								                items,
 								                cwd,
 								                approval_policy,
 								                sandbox_policy,
 								                model,
 								                effort,
 								                summary,
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								                final_output_json_schema,
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								            } => {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                turn_context
 								                    .client
 								                    .get_otel_event_manager()
 								                    .user_prompt(&items);
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								                // attempt to inject input into current task
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                if let Err(items) = sess.inject_input(items).await {
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								                    // Derive a fresh TurnContext for this turn using the provided overrides.
 								                    let provider = turn_context.client.get_provider();
-												Add auth to send_user_turn (#2688)

It is there for send_user_message but was omitted from send_user_turn.
Presumably this was a mistake
											
										
										
											2025-08-25 15:57:20 -07:00
+								                    let auth_manager = turn_context.client.get_auth_manager();
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
 								                    // Derive a model family for the requested model; fall back to the session's.
 								                    let model_family = find_family_for_model(&model)
 								                        .unwrap_or_else(|| config.model_family.clone());
 								                    // Create a per‑turn Config clone with the requested model/family.
 								                    let mut per_turn_config = (*config).clone();
 								                    per_turn_config.model = model.clone();
 								                    per_turn_config.model_family = model_family.clone();
-												send context window with task started (#2752)

- Send context window with task started
- Accounting for changing the model per turn
											
										
										
											2025-08-27 00:04:21 -07:00
+								                    if let Some(model_info) = get_model_info(&model_family) {
 								                        per_turn_config.model_context_window = Some(model_info.context_window);
 								                    }
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                    let otel_event_manager =
 								                        turn_context.client.get_otel_event_manager().with_model(
 								                            per_turn_config.model.as_str(),
 								                            per_turn_config.model_family.slug.as_str(),
 								                        );
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								                    // Build a new client with per‑turn reasoning settings.
 								                    // Reuse the same provider and session id; auth defaults to env/API key.
 								                    let client = ModelClient::new(
 								                        Arc::new(per_turn_config),
-												Add auth to send_user_turn (#2688)

It is there for send_user_message but was omitted from send_user_turn.
Presumably this was a mistake
											
										
										
											2025-08-25 15:57:20 -07:00
+								                        auth_manager,
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                        otel_event_manager,
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								                        provider,
-												consolidate reasoning enums into one (#2428)

We have three enums for each of reasoning summaries and reasoning effort
with same values. They can be consolidated into one.
											
										
										
											2025-08-18 11:50:17 -07:00
+								                        effort,
 								                        summary,
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								                        sess.conversation_id,
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								                    );
 								                    let fresh_turn_context = TurnContext {
 								                        client,
-												fix: build is broken on main; introduce ToolsConfigParams to help fix (#2663)

`ToolsConfig::new()` taking a large number of boolean params was hard to
manage and it finally bit us (see
https://github.com/openai/codex/pull/2660). This changes
`ToolsConfig::new()` so that it takes a struct (and also reduces the
visibility of some members, where possible).
											
										
										
											2025-08-24 22:43:42 -07:00
+								                        tools_config: ToolsConfig::new(&ToolsConfigParams {
 								                            model_family: &model_family,
 								                            include_plan_tool: config.include_plan_tool,
 								                            include_apply_patch_tool: config.include_apply_patch_tool,
 								                            include_web_search_request: config.tools_web_search_request,
 								                            use_streamable_shell_tool: config
 								                                .use_experimental_streamable_shell_tool,
-												Add "View Image" tool (#2723)

Adds a "View Image" tool so Codex can find and see images by itself:

<img width="1772" height="420" alt="Screenshot 2025-08-26 at 10 40
04 AM"
src="https://github.com/user-attachments/assets/7a459c7b-0b86-4125-82d9-05fbb35ade03"
/>
											
										
										
											2025-08-27 17:41:23 -07:00
+								                            include_view_image_tool: config.include_view_image_tool,
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								                            experimental_unified_exec_tool: config
 								                                .use_experimental_unified_exec_tool,
-												fix: build is broken on main; introduce ToolsConfigParams to help fix (#2663)

`ToolsConfig::new()` taking a large number of boolean params was hard to
manage and it finally bit us (see
https://github.com/openai/codex/pull/2660). This changes
`ToolsConfig::new()` so that it takes a struct (and also reduces the
visibility of some members, where possible).
											
										
										
											2025-08-24 22:43:42 -07:00
+								                        }),
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								                        user_instructions: turn_context.user_instructions.clone(),
 								                        base_instructions: turn_context.base_instructions.clone(),
 								                        approval_policy,
 								                        sandbox_policy,
 								                        shell_environment_policy: turn_context.shell_environment_policy.clone(),
 								                        cwd,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								                        is_review_mode: false,
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								                        final_output_json_schema,
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								                    };
-												fix: Record EnvironmentContext in SendUserTurn (#3678)

## Summary
SendUserTurn has not been correctly handling updates to policies. While
the tui protocol handles this in `Op::OverrideTurnContext`, the
SendUserTurn should be appending `EnvironmentContext` messages when the
sandbox settings change. MCP client behavior should match the cli
behavior, so we update `SendUserTurn` message to match.

## Testing
- [x] Added prompt caching tests
											
										
										
											2025-09-16 11:32:20 -07:00
 								                    // if the environment context has changed, record it in the conversation history
 								                    let previous_env_context = EnvironmentContext::from(turn_context.as_ref());
 								                    let new_env_context = EnvironmentContext::from(&fresh_turn_context);
 								                    if !new_env_context.equals_except_shell(&previous_env_context) {
 								                        sess.record_conversation_items(&[ResponseItem::from(new_env_context)])
 								                            .await;
 								                    }
 								                    // Install the new persistent context for subsequent tasks/turns.
 								                    turn_context = Arc::new(fresh_turn_context);
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								                    // no current task, spawn a new one with the per-turn context
 								                    sess.spawn_task(Arc::clone(&turn_context), sub.id, items, RegularTask)
 								                        .await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                }
 								            }
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								            Op::ExecApproval { id, decision } => match decision {
 								                ReviewDecision::Abort => {
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                    sess.interrupt_task().await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                }
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                other => sess.notify_approval(&id, other).await,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								            },
 								            Op::PatchApproval { id, decision } => match decision {
 								                ReviewDecision::Abort => {
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                    sess.interrupt_task().await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                }
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                other => sess.notify_approval(&id, other).await,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								            },
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								            Op::AddToHistory { text } => {
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								                let id = sess.conversation_id;
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								                let config = config.clone();
 								                tokio::spawn(async move {
 								                    if let Err(e) = crate::message_history::append_entry(&text, &id, &config).await
 								                    {
-												[Rust] Allow resuming a session that was killed with ctrl + c (#1387)

Previously, if you ctrl+c'd a conversation, all subsequent turns would
400 because the Responses API never got a response for one of its call
ids. This ensures that if we aren't sending a call id by hand, we
generate a synthetic aborted call.

Fixes #1244 


https://github.com/user-attachments/assets/5126354f-b970-45f5-8c65-f811bca8294a
											
										
										
											2025-06-26 14:40:42 -04:00
+								                        warn!("failed to append to message history: {e}");
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								                    }
 								                });
 								            }
 								            Op::GetHistoryEntryRequest { offset, log_id } => {
 								                let config = config.clone();
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                let sess_clone = sess.clone();
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								                let sub_id = sub.id.clone();
 								                tokio::spawn(async move {
 								                    // Run lookup in blocking thread because it does file IO + locking.
 								                    let entry_opt = tokio::task::spawn_blocking(move || {
 								                        crate::message_history::lookup(log_id, offset, &config)
 								                    })
 								                    .await
 								                    .unwrap_or(None);
 								                    let event = Event {
 								                        id: sub_id,
 								                        msg: EventMsg::GetHistoryEntryResponse(
 								                            crate::protocol::GetHistoryEntryResponseEvent {
 								                                offset,
 								                                log_id,
-												fix: introduce codex-protocol crate (#2355)


											
										
										
											2025-08-15 12:44:40 -07:00
+								                                entry: entry_opt.map(|e| {
 								                                    codex_protocol::message_history::HistoryEntry {
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								                                        conversation_id: e.session_id,
-												fix: introduce codex-protocol crate (#2355)


											
										
										
											2025-08-15 12:44:40 -07:00
+								                                        ts: e.ts,
 								                                        text: e.text,
 								                                    }
 								                                }),
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								                            },
 								                        ),
 								                    };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                    sess_clone.send_event(event).await;
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								                });
 								            }
-												[tui] Support /mcp command (#2430)

## Summary
Adds a `/mcp` command to list active tools. We can extend this command
to allow configuration of MCP tools, but for now a simple list command
will help debug if your config.toml and your tools are working as
expected.
											
										
										
											2025-08-19 09:00:31 -07:00
+								            Op::ListMcpTools => {
 								                let sub_id = sub.id.clone();
 								                // This is a cheap lookup from the connection manager's cache.
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                let tools = sess.services.mcp_connection_manager.list_all_tools();
-												[tui] Support /mcp command (#2430)

## Summary
Adds a `/mcp` command to list active tools. We can extend this command
to allow configuration of MCP tools, but for now a simple list command
will help debug if your config.toml and your tools are working as
expected.
											
										
										
											2025-08-19 09:00:31 -07:00
+								                let event = Event {
 								                    id: sub_id,
 								                    msg: EventMsg::McpListToolsResponse(
 								                        crate::protocol::McpListToolsResponseEvent { tools },
 								                    ),
 								                };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                sess.send_event(event).await;
-												[tui] Support /mcp command (#2430)

## Summary
Adds a `/mcp` command to list active tools. We can extend this command
to allow configuration of MCP tools, but for now a simple list command
will help debug if your config.toml and your tools are working as
expected.
											
										
										
											2025-08-19 09:00:31 -07:00
+								            }
-												Custom /prompts (#2696)

Adds custom `/prompts` to `~/.codex/prompts/<command>.md`.

<img width="239" height="107" alt="Screenshot 2025-08-25 at 6 22 42 PM"
src="https://github.com/user-attachments/assets/fe6ebbaa-1bf6-49d3-95f9-fdc53b752679"
/>

---

Details:

1. Adds `Op::ListCustomPrompts` to core.
2. Returns `ListCustomPromptsResponse` with list of `CustomPrompt`
(name, content).
3. TUI calls the operation on load, and populates the custom prompts
(excluding prompts that collide with builtins).
4. Selecting the custom prompt automatically sends the prompt to the
agent.
											
										
										
											2025-08-28 19:16:39 -07:00
+								            Op::ListCustomPrompts => {
 								                let sub_id = sub.id.clone();
 								                let custom_prompts: Vec<CustomPrompt> =
 								                    if let Some(dir) = crate::custom_prompts::default_prompts_dir() {
 								                        crate::custom_prompts::discover_prompts_in(&dir).await
 								                    } else {
 								                        Vec::new()
 								                    };
 								                let event = Event {
 								                    id: sub_id,
 								                    msg: EventMsg::ListCustomPromptsResponse(ListCustomPromptsResponseEvent {
 								                        custom_prompts,
 								                    }),
 								                };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                sess.send_event(event).await;
-												Custom /prompts (#2696)

Adds custom `/prompts` to `~/.codex/prompts/<command>.md`.

<img width="239" height="107" alt="Screenshot 2025-08-25 at 6 22 42 PM"
src="https://github.com/user-attachments/assets/fe6ebbaa-1bf6-49d3-95f9-fdc53b752679"
/>

---

Details:

1. Adds `Op::ListCustomPrompts` to core.
2. Returns `ListCustomPromptsResponse` with list of `CustomPrompt`
(name, content).
3. TUI calls the operation on load, and populates the custom prompts
(excluding prompts that collide with builtins).
4. Selecting the custom prompt automatically sends the prompt to the
agent.
											
										
										
											2025-08-28 19:16:39 -07:00
+								            }
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								            Op::Compact => {
 								                // Attempt to inject input into current task
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                if let Err(items) = sess
 								                    .inject_input(vec![InputItem::Text {
-												chore: compact do not modify instructions  (#4088)

Keep the developer instruction and insert the summarisation message as a
user message instead
											
										
										
											2025-09-23 17:59:17 +01:00
+								                        text: compact::SUMMARIZATION_PROMPT.to_string(),
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                    }])
 								                    .await
 								                {
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								                    sess.spawn_task(Arc::clone(&turn_context), sub.id, items, CompactTask)
 								                        .await;
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								                }
 								            }
-												Flaky CI fix (#1647)

Flushing before sending `TaskCompleteEvent` and ending the submission
loop to avoid race conditions.
											
										
										
											2025-07-23 15:03:26 -07:00
+								            Op::Shutdown => {
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								                sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
-												Flaky CI fix (#1647)

Flushing before sending `TaskCompleteEvent` and ending the submission
loop to avoid race conditions.
											
										
										
											2025-07-23 15:03:26 -07:00
+								                info!("Shutting down Codex instance");
 								                // Gracefully flush and shutdown rollout recorder on session end so tests
 								                // that inspect the rollout file do not race with the background writer.
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                let recorder_opt = {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                    let mut guard = sess.services.rollout.lock().await;
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                    guard.take()
 								                };
-												chore: upgrade to Rust 1.89 (#2465)

Codex created this PR from the following prompt:

> upgrade this entire repo to Rust 1.89. Note that this requires
updating codex-rs/rust-toolchain.toml as well as the workflows in
.github/. Make sure that things are "clippy clean" as this change will
likely uncover new Clippy errors. `just fmt` and `cargo clippy --tests`
are sufficient to check for correctness

Note this modifies a lot of lines because it folds nested `if`
statements using `&&`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2465).
* #2467
* __->__ #2465
											
										
										
											2025-08-19 13:22:02 -07:00
+								                if let Some(rec) = recorder_opt
 								                    && let Err(e) = rec.shutdown().await
 								                {
 								                    warn!("failed to shutdown rollout recorder: {e}");
 								                    let event = Event {
 								                        id: sub.id.clone(),
 								                        msg: EventMsg::Error(ErrorEvent {
 								                            message: "Failed to shutdown rollout recorder".to_string(),
 								                        }),
 								                    };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                    sess.send_event(event).await;
-												Flaky CI fix (#1647)

Flushing before sending `TaskCompleteEvent` and ending the submission
loop to avoid race conditions.
											
										
										
											2025-07-23 15:03:26 -07:00
+								                }
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
-												Flaky CI fix (#1647)

Flushing before sending `TaskCompleteEvent` and ending the submission
loop to avoid race conditions.
											
										
										
											2025-07-23 15:03:26 -07:00
+								                let event = Event {
 								                    id: sub.id.clone(),
 								                    msg: EventMsg::ShutdownComplete,
 								                };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                sess.send_event(event).await;
-												Flaky CI fix (#1647)

Flushing before sending `TaskCompleteEvent` and ending the submission
loop to avoid race conditions.
											
										
										
											2025-07-23 15:03:26 -07:00
+								                break;
 								            }
-												Change forking to read the rollout from file (#3440)

This PR changes get history op to get path. Then, forking will use a
path. This will help us have one unified codepath for resuming/forking
conversations. Will also help in having rollout history in order. It
also fixes a bug where you won't see the UI when resuming after forking.
											
										
										
											2025-09-10 17:42:54 -07:00
+								            Op::GetPath => {
-												fork conversation from a previous message (#2575)

This can be the underlying logic in order to start a conversation from a
previous message. will need some love in the UI.

Base for building this: #2588
											
										
										
											2025-08-22 17:06:09 -07:00
+								                let sub_id = sub.id.clone();
-												Change forking to read the rollout from file (#3440)

This PR changes get history op to get path. Then, forking will use a
path. This will help us have one unified codepath for resuming/forking
conversations. Will also help in having rollout history in order. It
also fixes a bug where you won't see the UI when resuming after forking.
											
										
										
											2025-09-10 17:42:54 -07:00
+								                // Flush rollout writes before returning the path so readers observe a consistent file.
 								                let (path, rec_opt) = {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                    let guard = sess.services.rollout.lock().await;
-												Change forking to read the rollout from file (#3440)

This PR changes get history op to get path. Then, forking will use a
path. This will help us have one unified codepath for resuming/forking
conversations. Will also help in having rollout history in order. It
also fixes a bug where you won't see the UI when resuming after forking.
											
										
										
											2025-09-10 17:42:54 -07:00
+								                    match guard.as_ref() {
 								                        Some(rec) => (rec.get_rollout_path(), Some(rec.clone())),
 								                        None => {
 								                            error!("rollout recorder not found");
 								                            continue;
 								                        }
 								                    }
 								                };
 								                if let Some(rec) = rec_opt
 								                    && let Err(e) = rec.flush().await
 								                {
 								                    warn!("failed to flush rollout recorder before GetHistory: {e}");
 								                }
-												fork conversation from a previous message (#2575)

This can be the underlying logic in order to start a conversation from a
previous message. will need some love in the UI.

Base for building this: #2588
											
										
										
											2025-08-22 17:06:09 -07:00
+								                let event = Event {
 								                    id: sub_id.clone(),
-												Change forking to read the rollout from file (#3440)

This PR changes get history op to get path. Then, forking will use a
path. This will help us have one unified codepath for resuming/forking
conversations. Will also help in having rollout history in order. It
also fixes a bug where you won't see the UI when resuming after forking.
											
										
										
											2025-09-10 17:42:54 -07:00
+								                    msg: EventMsg::ConversationPath(ConversationPathResponseEvent {
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								                        conversation_id: sess.conversation_id,
-												Change forking to read the rollout from file (#3440)

This PR changes get history op to get path. Then, forking will use a
path. This will help us have one unified codepath for resuming/forking
conversations. Will also help in having rollout history in order. It
also fixes a bug where you won't see the UI when resuming after forking.
											
										
										
											2025-09-10 17:42:54 -07:00
+								                        path,
-												fork conversation from a previous message (#2575)

This can be the underlying logic in order to start a conversation from a
previous message. will need some love in the UI.

Base for building this: #2588
											
										
										
											2025-08-22 17:06:09 -07:00
+								                    }),
 								                };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                sess.send_event(event).await;
-												fork conversation from a previous message (#2575)

This can be the underlying logic in order to start a conversation from a
previous message. will need some love in the UI.

Base for building this: #2588
											
										
										
											2025-08-22 17:06:09 -07:00
+								            }
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								            Op::Review { review_request } => {
 								                spawn_review_thread(
 								                    sess.clone(),
 								                    config.clone(),
 								                    turn_context.clone(),
 								                    sub.id,
 								                    review_request,
 								                )
 								                .await;
 								            }
-												fix: introduce codex-protocol crate (#2355)


											
										
										
											2025-08-15 12:44:40 -07:00
+								            _ => {
 								                // Ignore unknown ops; enum is non_exhaustive to allow extensions.
 								            }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
 								    }
 								    debug!("Agent loop exited");
 								}
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								/// Spawn a review thread using the given prompt.
 								async fn spawn_review_thread(
 								    sess: Arc<Session>,
 								    config: Arc<Config>,
 								    parent_turn_context: Arc<TurnContext>,
 								    sub_id: String,
 								    review_request: ReviewRequest,
 								) {
 								    let model = config.review_model.clone();
 								    let review_model_family = find_family_for_model(&model)
 								        .unwrap_or_else(|| parent_turn_context.client.get_model_family());
 								    let tools_config = ToolsConfig::new(&ToolsConfigParams {
 								        model_family: &review_model_family,
 								        include_plan_tool: false,
 								        include_apply_patch_tool: config.include_apply_patch_tool,
 								        include_web_search_request: false,
 								        use_streamable_shell_tool: false,
 								        include_view_image_tool: false,
 								        experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
 								    });
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								    let base_instructions = REVIEW_PROMPT.to_string();
 								    let review_prompt = review_request.prompt.clone();
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    let provider = parent_turn_context.client.get_provider();
 								    let auth_manager = parent_turn_context.client.get_auth_manager();
 								    let model_family = review_model_family.clone();
 								    // Build per‑turn client with the requested model/family.
 								    let mut per_turn_config = (*config).clone();
 								    per_turn_config.model = model.clone();
 								    per_turn_config.model_family = model_family.clone();
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								    per_turn_config.model_reasoning_effort = Some(ReasoningEffortConfig::Low);
 								    per_turn_config.model_reasoning_summary = ReasoningSummaryConfig::Detailed;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    if let Some(model_info) = get_model_info(&model_family) {
 								        per_turn_config.model_context_window = Some(model_info.context_window);
 								    }
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								    let otel_event_manager = parent_turn_context
 								        .client
 								        .get_otel_event_manager()
 								        .with_model(
 								            per_turn_config.model.as_str(),
 								            per_turn_config.model_family.slug.as_str(),
 								        );
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								    let per_turn_config = Arc::new(per_turn_config);
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    let client = ModelClient::new(
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								        per_turn_config.clone(),
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        auth_manager,
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        otel_event_manager,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        provider,
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								        per_turn_config.model_reasoning_effort,
 								        per_turn_config.model_reasoning_summary,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        sess.conversation_id,
 								    );
 								    let review_turn_context = TurnContext {
 								        client,
 								        tools_config,
 								        user_instructions: None,
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								        base_instructions: Some(base_instructions.clone()),
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        approval_policy: parent_turn_context.approval_policy,
 								        sandbox_policy: parent_turn_context.sandbox_policy.clone(),
 								        shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
 								        cwd: parent_turn_context.cwd.clone(),
 								        is_review_mode: true,
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								        final_output_json_schema: None,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    };
 								    // Seed the child task with the review prompt as the initial user message.
 								    let input: Vec<InputItem> = vec![InputItem::Text {
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								        text: format!("{base_instructions}\n\n---\n\nNow, here's your task: {review_prompt}"),
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    }];
 								    let tc = Arc::new(review_turn_context);
 								    // Clone sub_id for the upcoming announcement before moving it into the task.
 								    let sub_id_for_event = sub_id.clone();
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    sess.spawn_task(tc.clone(), sub_id, input, ReviewTask).await;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
 								    // Announce entering review mode so UIs can switch modes.
 								    sess.send_event(Event {
 								        id: sub_id_for_event,
 								        msg: EventMsg::EnteredReviewMode(review_request),
 								    })
 								    .await;
 								}
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								/// Takes a user message as input and runs a loop where, at each turn, the model
 								/// replies with either:
 								///
 								/// - requested function calls
 								/// - an assistant message
 								///
 								/// While it is possible for the model to return multiple of these items in a
 								/// single turn, in practice, we generally one item per turn:
 								///
 								/// - If the model requests a function call, we execute it and send the output
 								///   back to the model in the next turn.
 								/// - If the model sends only an assistant message, we record it in the
 								///   conversation history and consider the task complete.
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								///
 								/// Review mode: when `turn_context.is_review_mode` is true, the turn runs in an
 								/// isolated in-memory thread without the parent session's prior history or
 								/// user_instructions. Emits ExitedReviewMode upon final review message.
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								pub(crate) async fn run_task(
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    sess: Arc<Session>,
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    turn_context: Arc<TurnContext>,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    sub_id: String,
 								    input: Vec<InputItem>,
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								) -> Option<String> {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    if input.is_empty() {
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        return None;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
 								    let event = Event {
 								        id: sub_id.clone(),
-												send context window with task started (#2752)

- Send context window with task started
- Accounting for changing the model per turn
											
										
										
											2025-08-27 00:04:21 -07:00
+								        msg: EventMsg::TaskStarted(TaskStartedEvent {
 								            model_context_window: turn_context.client.get_model_context_window(),
 								        }),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    sess.send_event(event).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    // For review threads, keep an isolated in-memory history so the
 								    // model sees a fresh conversation without the parent session's history.
 								    // For normal turns, continue recording to the session history as before.
 								    let is_review_mode = turn_context.is_review_mode;
 								    let mut review_thread_history: Vec<ResponseItem> = Vec::new();
 								    if is_review_mode {
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								        // Seed review threads with environment context so the model knows the working directory.
 								        review_thread_history.extend(sess.build_initial_context(turn_context.as_ref()));
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        review_thread_history.push(initial_input_for_turn.into());
 								    } else {
 								        sess.record_input_and_rollout_usermsg(&initial_input_for_turn)
 								            .await;
 								    }
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
-												[core] Allow resume after client errors (#2053)

## Summary
Allow tui conversations to resume after the client fails out of retries.
I tested this with exec / mocked api failures as well, and it appears to
be fine. But happy to add an exec integration test as well!

## Testing
- [x] Added integration test
- [x] Tested locally
											
										
										
											2025-08-08 18:21:19 -07:00
+								    let mut last_agent_message: Option<String> = None;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								    // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
 								    // many turns, from the perspective of the user, it is a single turn.
 								    let mut turn_diff_tracker = TurnDiffTracker::new();
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    let mut auto_compact_recently_attempted = false;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    loop {
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								        // Note that pending_input would be something like a message the user
 								        // submitted through the UI while the model was running. Though the UI
 								        // may support this, the model might not.
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								        let pending_input = sess
 								            .get_pending_input()
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								            .await
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								            .into_iter()
 								            .map(ResponseItem::from)
 								            .collect::<Vec<ResponseItem>>();
-												feat: support the chat completions API in the Rust CLI (#862)

This is a substantial PR to add support for the chat completions API,
which in turn makes it possible to use non-OpenAI model providers (just
like in the TypeScript CLI):

* It moves a number of structs from `client.rs` to `client_common.rs` so
they can be shared.
* It introduces support for the chat completions API in
`chat_completions.rs`.
* It updates `ModelProviderInfo` so that `env_key` is `Option<String>`
instead of `String` (for e.g., ollama) and adds a `wire_api` field
* It updates `client.rs` to choose between `stream_responses()` and
`stream_chat_completions()` based on the `wire_api` for the
`ModelProviderInfo`
* It updates the `exec` and TUI CLIs to no longer fail if the
`OPENAI_API_KEY` environment variable is not set
* It updates the TUI so that `EventMsg::Error` is displayed more
prominently when it occurs, particularly now that it is important to
alert users to the `CodexErr::EnvVar` variant.
* `CodexErr::EnvVar` was updated to include an optional `instructions`
field so we can preserve the behavior where we direct users to
https://platform.openai.com if `OPENAI_API_KEY` is not set.
* Cleaned up the "welcome message" in the TUI to ensure the model
provider is displayed.
* Updated the docs in `codex-rs/README.md`.

To exercise the chat completions API from OpenAI models, I added the
following to my `config.toml`:

```toml
model = "gpt-4o"
model_provider = "openai-chat-completions"

[model_providers.openai-chat-completions]
name = "OpenAI using Chat Completions"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
wire_api = "chat"
```

Though to test a non-OpenAI provider, I installed ollama with mistral
locally on my Mac because ChatGPT said that would be a good match for my
hardware:

```shell
brew install ollama
ollama serve
ollama pull mistral
```

Then I added the following to my `~/.codex/config.toml`:

```toml
model = "mistral"
model_provider = "ollama"
```

Note this code could certainly use more test coverage, but I want to get
this in so folks can start playing with it.

For reference, I believe https://github.com/openai/codex/pull/247 was
roughly the comparable PR on the TypeScript side.
											
										
										
											2025-05-08 21:46:06 -07:00
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        // Construct the input that we will send to the model.
 								        //
 								        // - For review threads, use the isolated in-memory history so the
 								        //   model sees a fresh conversation (no parent history/user_instructions).
 								        //
 								        // - For normal turns, use the session's full history. When using the
 								        //   chat completions API (or ZDR clients), the model needs the full
 								        //   conversation history on each turn. The rollout file, however, should
 								        //   only record the new items that originated in this turn so that it
 								        //   represents an append-only log without duplicates.
 								        let turn_input: Vec<ResponseItem> = if is_review_mode {
 								            if !pending_input.is_empty() {
 								                review_thread_history.extend(pending_input);
 								            }
 								            review_thread_history.clone()
 								        } else {
 								            sess.record_conversation_items(&pending_input).await;
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								            sess.turn_input_with_history(pending_input).await
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        };
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								        let turn_input_messages: Vec<String> = turn_input
 								            .iter()
 								            .filter_map(|item| match item {
 								                ResponseItem::Message { content, .. } => Some(content),
 								                _ => None,
 								            })
 								            .flat_map(|content| {
 								                content.iter().filter_map(|item| match item {
 								                    ContentItem::OutputText { text } => Some(text.clone()),
 								                    _ => None,
 								                })
 								            })
 								            .collect();
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        match run_turn(
 								            &sess,
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								            turn_context.as_ref(),
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								            &mut turn_diff_tracker,
 								            sub_id.clone(),
 								            turn_input,
 								        )
 								        .await
 								        {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            Ok(turn_output) => {
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                let TurnRunResult {
 								                    processed_items,
 								                    total_token_usage,
 								                } = turn_output;
 								                let limit = turn_context
 								                    .client
 								                    .get_auto_compact_token_limit()
 								                    .unwrap_or(i64::MAX);
 								                let total_usage_tokens = total_token_usage
 								                    .as_ref()
-												chore: clippy on redundant closure (#4058)

Add redundant closure clippy rules and let Codex fix it by minimising
FQP
											
										
										
											2025-09-22 20:30:16 +01:00
+								                    .map(TokenUsage::tokens_in_context_window);
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                let token_limit_reached = total_usage_tokens
 								                    .map(|tokens| (tokens as i64) >= limit)
 								                    .unwrap_or(false);
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								                let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
 								                let mut responses = Vec::<ResponseInputItem>::new();
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                for processed_response_item in processed_items {
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								                    let ProcessedResponseItem { item, response } = processed_response_item;
 								                    match (&item, &response) {
 								                        (ResponseItem::Message { role, .. }, None) if role == "assistant" => {
 								                            // If the model returned a message, we need to record it.
 								                            items_to_record_in_conversation_history.push(item);
 								                        }
 								                        (
 								                            ResponseItem::LocalShellCall { .. },
 								                            Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
 								                        ) => {
 								                            items_to_record_in_conversation_history.push(item);
 								                            items_to_record_in_conversation_history.push(
 								                                ResponseItem::FunctionCallOutput {
 								                                    call_id: call_id.clone(),
 								                                    output: output.clone(),
 								                                },
 								                            );
 								                        }
 								                        (
 								                            ResponseItem::FunctionCall { .. },
 								                            Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
 								                        ) => {
 								                            items_to_record_in_conversation_history.push(item);
 								                            items_to_record_in_conversation_history.push(
 								                                ResponseItem::FunctionCallOutput {
 								                                    call_id: call_id.clone(),
 								                                    output: output.clone(),
 								                                },
 								                            );
 								                        }
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								                        (
 								                            ResponseItem::CustomToolCall { .. },
 								                            Some(ResponseInputItem::CustomToolCallOutput { call_id, output }),
 								                        ) => {
 								                            items_to_record_in_conversation_history.push(item);
 								                            items_to_record_in_conversation_history.push(
 								                                ResponseItem::CustomToolCallOutput {
 								                                    call_id: call_id.clone(),
 								                                    output: output.clone(),
 								                                },
 								                            );
 								                        }
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								                        (
 								                            ResponseItem::FunctionCall { .. },
 								                            Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
 								                        ) => {
 								                            items_to_record_in_conversation_history.push(item);
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								                            let output = match result {
 								                                Ok(call_tool_result) => {
 								                                    convert_call_tool_result_to_function_call_output_payload(
 								                                        call_tool_result,
 								                                    )
 								                                }
 								                                Err(err) => FunctionCallOutputPayload {
 								                                    content: err.clone(),
 								                                    success: Some(false),
-												chore: support MCP schema 2025-06-18 (#1621)

This updates the schema in `generate_mcp_types.py` from `2025-03-26` to
`2025-06-18`, regenerates `mcp-types/src/lib.rs`, and then updates all
the code that uses `mcp-types` to honor the changes.

Ran

```
npx @modelcontextprotocol/inspector just codex mcp
```

and verified that I was able to invoke the `codex` tool, as expected.


---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1621).
* #1623
* #1622
* __->__ #1621
											
										
										
											2025-07-19 00:09:34 -04:00
+								                                },
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								                            };
 								                            items_to_record_in_conversation_history.push(
 								                                ResponseItem::FunctionCallOutput {
 								                                    call_id: call_id.clone(),
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								                                    output,
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								                                },
 								                            );
 								                        }
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								                        (
 								                            ResponseItem::Reasoning {
 								                                id,
 								                                summary,
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								                                content,
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								                                encrypted_content,
 								                            },
 								                            None,
 								                        ) => {
 								                            items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
 								                                id: id.clone(),
 								                                summary: summary.clone(),
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								                                content: content.clone(),
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								                                encrypted_content: encrypted_content.clone(),
 								                            });
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								                        }
 								                        _ => {
 								                            warn!("Unexpected response item: {item:?} with response: {response:?}");
 								                        }
 								                    };
 								                    if let Some(response) = response {
 								                        responses.push(response);
 								                    }
 								                }
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
 								                // Only attempt to take the lock if there is something to record.
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								                if !items_to_record_in_conversation_history.is_empty() {
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								                    if is_review_mode {
 								                        review_thread_history
 								                            .extend(items_to_record_in_conversation_history.clone());
 								                    } else {
 								                        sess.record_conversation_items(&items_to_record_in_conversation_history)
 								                            .await;
 								                    }
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								                }
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                if token_limit_reached {
 								                    if auto_compact_recently_attempted {
 								                        let limit_str = limit.to_string();
 								                        let current_tokens = total_usage_tokens
 								                            .map(|tokens| tokens.to_string())
 								                            .unwrap_or_else(|| "unknown".to_string());
 								                        let event = Event {
 								                            id: sub_id.clone(),
 								                            msg: EventMsg::Error(ErrorEvent {
 								                                message: format!(
 								                                    "Conversation is still above the token limit after automatic summarization (limit {limit_str}, current {current_tokens}). Please start a new session or trim your input."
 								                                ),
 								                            }),
 								                        };
 								                        sess.send_event(event).await;
 								                        break;
 								                    }
 								                    auto_compact_recently_attempted = true;
 								                    compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
 								                    continue;
 								                }
 								                auto_compact_recently_attempted = false;
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								                if responses.is_empty() {
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								                    last_agent_message = get_last_assistant_message_from_turn(
 								                        &items_to_record_in_conversation_history,
 								                    );
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								                    sess.notifier()
 								                        .notify(&UserNotification::AgentTurnComplete {
 								                            turn_id: sub_id.clone(),
 								                            input_messages: turn_input_messages,
 								                            last_assistant_message: last_agent_message.clone(),
 								                        });
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                    break;
 								                }
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                continue;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
 								            Err(e) => {
 								                info!("Turn error: {e:#}");
 								                let event = Event {
 								                    id: sub_id.clone(),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								                    msg: EventMsg::Error(ErrorEvent {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                        message: e.to_string(),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								                    }),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                sess.send_event(event).await;
-												[core] Allow resume after client errors (#2053)

## Summary
Allow tui conversations to resume after the client fails out of retries.
I tested this with exec / mocked api failures as well, and it appears to
be fine. But happy to add an exec integration test as well!

## Testing
- [x] Added integration test
- [x] Tested locally
											
										
										
											2025-08-08 18:21:19 -07:00
+								                // let the user continue the conversation
 								                break;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
 								        }
 								    }
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
 								    // If this was a review thread and we have a final assistant message,
 								    // try to parse it as a ReviewOutput.
 								    //
 								    // If parsing fails, construct a minimal ReviewOutputEvent using the plain
 								    // text as the overall explanation. Else, just exit review mode with None.
 								    //
 								    // Emits an ExitedReviewMode event with the parsed review output.
 								    if turn_context.is_review_mode {
 								        exit_review_mode(
 								            sess.clone(),
 								            sub_id.clone(),
 								            last_agent_message.as_deref().map(parse_review_output_event),
 								        )
 								        .await;
 								    }
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    last_agent_message
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								}
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								/// Parse the review output; when not valid JSON, build a structured
 								/// fallback that carries the plain text as the overall explanation.
 								///
 								/// Returns: a ReviewOutputEvent parsed from JSON or a fallback populated from text.
 								fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
 								    // Try direct parse first
 								    if let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(text) {
 								        return ev;
 								    }
 								    // If wrapped in markdown fences or extra prose, attempt to extract the first JSON object
 								    if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}'))
 								        && start < end
 								        && let Some(slice) = text.get(start..=end)
 								        && let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(slice)
 								    {
 								        return ev;
 								    }
 								    // Not JSON – return a structured ReviewOutputEvent that carries
 								    // the plain text as the overall explanation.
 								    ReviewOutputEvent {
 								        overall_explanation: text.to_string(),
 								        ..Default::default()
 								    }
 								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								async fn run_turn(
 								    sess: &Session,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    turn_context: &TurnContext,
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								    turn_diff_tracker: &mut TurnDiffTracker,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    sub_id: String,
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								    input: Vec<ResponseItem>,
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								) -> CodexResult<TurnRunResult> {
-												[core] Separate tools config from openai client (#1858)

## Summary
In an effort to make tools easier to work with and more configurable,
I'm introducing `ToolConfig` and updating `Prompt` to take in a general
list of Tools. I think this is simpler and better for a few reasons:
- We can easily assemble tools from various sources (our own harness,
mcp servers, etc.) and we can consolidate the logic for constructing the
logic in one place that is separate from serialization.
- client.rs no longer needs arbitrary config values, it just takes in a
list of tools to serialize

A hefty portion of the PR is now updating our conversion of
`mcp_types::Tool` to `OpenAITool`, but considering that @bolinfest
accurately called this out as a TODO long ago, I think it's time we
tackled it.

## Testing
- [x] Experimented locally, no changes, as expected
- [x] Added additional unit tests
- [x] Responded to rust-review
											
										
										
											2025-08-05 19:27:52 -07:00
+								    let tools = get_openai_tools(
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        &turn_context.tools_config,
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        Some(sess.services.mcp_connection_manager.list_all_tools()),
-												[core] Separate tools config from openai client (#1858)

## Summary
In an effort to make tools easier to work with and more configurable,
I'm introducing `ToolConfig` and updating `Prompt` to take in a general
list of Tools. I think this is simpler and better for a few reasons:
- We can easily assemble tools from various sources (our own harness,
mcp servers, etc.) and we can consolidate the logic for constructing the
logic in one place that is separate from serialization.
- client.rs no longer needs arbitrary config values, it just takes in a
list of tools to serialize

A hefty portion of the PR is now updating our conversion of
`mcp_types::Tool` to `OpenAITool`, but considering that @bolinfest
accurately called this out as a TODO long ago, I think it's time we
tackled it.

## Testing
- [x] Experimented locally, no changes, as expected
- [x] Added additional unit tests
- [x] Responded to rust-review
											
										
										
											2025-08-05 19:27:52 -07:00
+								    );
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    let prompt = Prompt {
 								        input,
-												[core] Separate tools config from openai client (#1858)

## Summary
In an effort to make tools easier to work with and more configurable,
I'm introducing `ToolConfig` and updating `Prompt` to take in a general
list of Tools. I think this is simpler and better for a few reasons:
- We can easily assemble tools from various sources (our own harness,
mcp servers, etc.) and we can consolidate the logic for constructing the
logic in one place that is separate from serialization.
- client.rs no longer needs arbitrary config values, it just takes in a
list of tools to serialize

A hefty portion of the PR is now updating our conversion of
`mcp_types::Tool` to `OpenAITool`, but considering that @bolinfest
accurately called this out as a TODO long ago, I think it's time we
tackled it.

## Testing
- [x] Experimented locally, no changes, as expected
- [x] Added additional unit tests
- [x] Responded to rust-review
											
										
										
											2025-08-05 19:27:52 -07:00
+								        tools,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        base_instructions_override: turn_context.base_instructions.clone(),
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								        output_schema: turn_context.final_output_json_schema.clone(),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    };
 								    let mut retries = 0;
 								    loop {
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        match try_run_turn(sess, turn_context, turn_diff_tracker, &sub_id, &prompt).await {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            Ok(output) => return Ok(output),
 								            Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
-												feat: support the chat completions API in the Rust CLI (#862)

This is a substantial PR to add support for the chat completions API,
which in turn makes it possible to use non-OpenAI model providers (just
like in the TypeScript CLI):

* It moves a number of structs from `client.rs` to `client_common.rs` so
they can be shared.
* It introduces support for the chat completions API in
`chat_completions.rs`.
* It updates `ModelProviderInfo` so that `env_key` is `Option<String>`
instead of `String` (for e.g., ollama) and adds a `wire_api` field
* It updates `client.rs` to choose between `stream_responses()` and
`stream_chat_completions()` based on the `wire_api` for the
`ModelProviderInfo`
* It updates the `exec` and TUI CLIs to no longer fail if the
`OPENAI_API_KEY` environment variable is not set
* It updates the TUI so that `EventMsg::Error` is displayed more
prominently when it occurs, particularly now that it is important to
alert users to the `CodexErr::EnvVar` variant.
* `CodexErr::EnvVar` was updated to include an optional `instructions`
field so we can preserve the behavior where we direct users to
https://platform.openai.com if `OPENAI_API_KEY` is not set.
* Cleaned up the "welcome message" in the TUI to ensure the model
provider is displayed.
* Updated the docs in `codex-rs/README.md`.

To exercise the chat completions API from OpenAI models, I added the
following to my `config.toml`:

```toml
model = "gpt-4o"
model_provider = "openai-chat-completions"

[model_providers.openai-chat-completions]
name = "OpenAI using Chat Completions"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
wire_api = "chat"
```

Though to test a non-OpenAI provider, I installed ollama with mistral
locally on my Mac because ChatGPT said that would be a good match for my
hardware:

```shell
brew install ollama
ollama serve
ollama pull mistral
```

Then I added the following to my `~/.codex/config.toml`:

```toml
model = "mistral"
model_provider = "ollama"
```

Note this code could certainly use more test coverage, but I want to get
this in so folks can start playing with it.

For reference, I believe https://github.com/openai/codex/pull/247 was
roughly the comparable PR on the TypeScript side.
											
										
										
											2025-05-08 21:46:06 -07:00
+								            Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								            Err(CodexErr::UsageLimitReached(e)) => {
 								                let rate_limits = e.rate_limits.clone();
 								                if let Some(rate_limits) = rate_limits {
 								                    sess.update_rate_limits(&sub_id, rate_limits).await;
 								                }
 								                return Err(CodexErr::UsageLimitReached(e));
-												Adjust error messages (#1969)

<img width="1378" height="285" alt="image"
src="https://github.com/user-attachments/assets/f0283378-f839-4a1f-8331-909694a04b1f"
/>
											
										
										
											2025-08-07 18:24:34 -07:00
+								            }
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								            Err(CodexErr::UsageNotIncluded) => return Err(CodexErr::UsageNotIncluded),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            Err(e) => {
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								                // Use the configured provider-specific stream retry budget.
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								                let max_retries = turn_context.client.get_provider().stream_max_retries();
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								                if retries < max_retries {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                    retries += 1;
-												Wait for requested delay in rate limit errors (#2266)

Fixes: https://github.com/openai/codex/issues/2131

Response doesn't have the delay in a separate field (yet) so parse the
message.
											
										
										
											2025-08-13 15:43:54 -07:00
+								                    let delay = match e {
 								                        CodexErr::Stream(_, Some(delay)) => delay,
 								                        _ => backoff(retries),
 								                    };
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                    warn!(
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								                        "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                    );
 								                    // Surface retry information to any UI/front‑end so the
 								                    // user understands what is happening instead of staring
 								                    // at a seemingly frozen screen.
-												Parse and expose stream errors (#2540)


											
										
										
											2025-08-21 01:15:24 -07:00
+								                    sess.notify_stream_error(
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                        &sub_id,
 								                        format!(
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								                            "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…"
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                        ),
 								                    )
 								                    .await;
 								                    tokio::time::sleep(delay).await;
 								                } else {
 								                    return Err(e);
 								                }
 								            }
 								        }
 								    }
 								}
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								/// When the model is prompted, it returns a stream of events. Some of these
 								/// events map to a `ResponseItem`. A `ResponseItem` may need to be
 								/// "handled" such that it produces a `ResponseInputItem` that needs to be
 								/// sent back to the model on the next turn.
-												[Rust] Allow resuming a session that was killed with ctrl + c (#1387)

Previously, if you ctrl+c'd a conversation, all subsequent turns would
400 because the Responses API never got a response for one of its call
ids. This ensures that if we aren't sending a call id by hand, we
generate a synthetic aborted call.

Fixes #1244 


https://github.com/user-attachments/assets/5126354f-b970-45f5-8c65-f811bca8294a
											
										
										
											2025-06-26 14:40:42 -04:00
+								#[derive(Debug)]
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								struct ProcessedResponseItem {
 								    item: ResponseItem,
 								    response: Option<ResponseInputItem>,
 								}
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								#[derive(Debug)]
 								struct TurnRunResult {
 								    processed_items: Vec<ProcessedResponseItem>,
 								    total_token_usage: Option<TokenUsage>,
 								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								async fn try_run_turn(
 								    sess: &Session,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    turn_context: &TurnContext,
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								    turn_diff_tracker: &mut TurnDiffTracker,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    sub_id: &str,
 								    prompt: &Prompt,
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								) -> CodexResult<TurnRunResult> {
-												[Rust] Allow resuming a session that was killed with ctrl + c (#1387)

Previously, if you ctrl+c'd a conversation, all subsequent turns would
400 because the Responses API never got a response for one of its call
ids. This ensures that if we aren't sending a call id by hand, we
generate a synthetic aborted call.

Fixes #1244 


https://github.com/user-attachments/assets/5126354f-b970-45f5-8c65-f811bca8294a
											
										
										
											2025-06-26 14:40:42 -04:00
+								    // call_ids that are part of this response.
 								    let completed_call_ids = prompt
 								        .input
 								        .iter()
 								        .filter_map(|ri| match ri {
 								            ResponseItem::FunctionCallOutput { call_id, .. } => Some(call_id),
 								            ResponseItem::LocalShellCall {
 								                call_id: Some(call_id),
 								                ..
 								            } => Some(call_id),
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								            ResponseItem::CustomToolCallOutput { call_id, .. } => Some(call_id),
-												[Rust] Allow resuming a session that was killed with ctrl + c (#1387)

Previously, if you ctrl+c'd a conversation, all subsequent turns would
400 because the Responses API never got a response for one of its call
ids. This ensures that if we aren't sending a call id by hand, we
generate a synthetic aborted call.

Fixes #1244 


https://github.com/user-attachments/assets/5126354f-b970-45f5-8c65-f811bca8294a
											
										
										
											2025-06-26 14:40:42 -04:00
+								            _ => None,
 								        })
 								        .collect::<Vec<_>>();
 								    // call_ids that were pending but are not part of this response.
 								    // This usually happens because the user interrupted the model before we responded to one of its tool calls
 								    // and then the user sent a follow-up message.
 								    let missing_calls = {
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								        prompt
 								            .input
-												[Rust] Allow resuming a session that was killed with ctrl + c (#1387)

Previously, if you ctrl+c'd a conversation, all subsequent turns would
400 because the Responses API never got a response for one of its call
ids. This ensures that if we aren't sending a call id by hand, we
generate a synthetic aborted call.

Fixes #1244 


https://github.com/user-attachments/assets/5126354f-b970-45f5-8c65-f811bca8294a
											
										
										
											2025-06-26 14:40:42 -04:00
+								            .iter()
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								            .filter_map(|ri| match ri {
 								                ResponseItem::FunctionCall { call_id, .. } => Some(call_id),
 								                ResponseItem::LocalShellCall {
 								                    call_id: Some(call_id),
 								                    ..
 								                } => Some(call_id),
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								                ResponseItem::CustomToolCall { call_id, .. } => Some(call_id),
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								                _ => None,
 								            })
-												[Rust] Allow resuming a session that was killed with ctrl + c (#1387)

Previously, if you ctrl+c'd a conversation, all subsequent turns would
400 because the Responses API never got a response for one of its call
ids. This ensures that if we aren't sending a call id by hand, we
generate a synthetic aborted call.

Fixes #1244 


https://github.com/user-attachments/assets/5126354f-b970-45f5-8c65-f811bca8294a
											
										
										
											2025-06-26 14:40:42 -04:00
+								            .filter_map(|call_id| {
 								                if completed_call_ids.contains(&call_id) {
 								                    None
 								                } else {
 								                    Some(call_id.clone())
 								                }
 								            })
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								            .map(|call_id| ResponseItem::CustomToolCallOutput {
-												chore: enable clippy::redundant_clone (#3489)

Created this PR by:

- adding `redundant_clone` to `[workspace.lints.clippy]` in
`cargo-rs/Cargol.toml`
- running `cargo clippy --tests --fix`
- running `just fmt`

Though I had to clean up one instance of the following that resulted:

```rust
let codex = codex;
```
											
										
										
											2025-09-11 11:59:37 -07:00
+								                call_id,
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								                output: "aborted".to_string(),
-												[Rust] Allow resuming a session that was killed with ctrl + c (#1387)

Previously, if you ctrl+c'd a conversation, all subsequent turns would
400 because the Responses API never got a response for one of its call
ids. This ensures that if we aren't sending a call id by hand, we
generate a synthetic aborted call.

Fixes #1244 


https://github.com/user-attachments/assets/5126354f-b970-45f5-8c65-f811bca8294a
											
										
										
											2025-06-26 14:40:42 -04:00
+								            })
 								            .collect::<Vec<_>>()
 								    };
 								    let prompt: Cow<Prompt> = if missing_calls.is_empty() {
 								        Cow::Borrowed(prompt)
 								    } else {
 								        // Add the synthetic aborted missing calls to the beginning of the input to ensure all call ids have responses.
 								        let input = [missing_calls, prompt.input.clone()].concat();
 								        Cow::Owned(Prompt {
 								            input,
 								            ..prompt.clone()
 								        })
 								    };
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								    let rollout_item = RolloutItem::TurnContext(TurnContextItem {
 								        cwd: turn_context.cwd.clone(),
 								        approval_policy: turn_context.approval_policy,
 								        sandbox_policy: turn_context.sandbox_policy.clone(),
-												chore: enable clippy::redundant_clone (#3489)

Created this PR by:

- adding `redundant_clone` to `[workspace.lints.clippy]` in
`cargo-rs/Cargol.toml`
- running `cargo clippy --tests --fix`
- running `just fmt`

Though I had to clean up one instance of the following that resulted:

```rust
let codex = codex;
```
											
										
										
											2025-09-11 11:59:37 -07:00
+								        model: turn_context.client.get_model(),
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								        effort: turn_context.client.get_reasoning_effort(),
 								        summary: turn_context.client.get_reasoning_summary(),
 								    });
 								    sess.persist_rollout_items(&[rollout_item]).await;
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    let mut stream = turn_context.client.clone().stream(&prompt).await?;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
 								    let mut output = Vec::new();
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								    loop {
 								        // Poll the next item from the model stream. We must inspect *both* Ok and Err
 								        // cases so that transient stream failures (e.g., dropped SSE connection before
 								        // `response.completed`) bubble up and trigger the caller's retry logic.
 								        let event = stream.next().await;
 								        let Some(event) = event else {
 								            // Channel closed without yielding a final Completed event or explicit error.
 								            // Treat as a disconnected stream so the caller can retry.
 								            return Err(CodexErr::Stream(
 								                "stream closed before response.completed".into(),
-												Wait for requested delay in rate limit errors (#2266)

Fixes: https://github.com/openai/codex/issues/2131

Response doesn't have the delay in a separate field (yet) so parse the
message.
											
										
										
											2025-08-13 15:43:54 -07:00
+								                None,
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								            ));
 								        };
 								        let event = match event {
 								            Ok(ev) => ev,
 								            Err(e) => {
 								                // Propagate the underlying stream error to the caller (run_turn), which
 								                // will apply the configured `stream_max_retries` policy.
 								                return Err(e);
 								            }
 								        };
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        match event {
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								            ResponseEvent::Created => {}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            ResponseEvent::OutputItemDone(item) => {
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								                let response = handle_response_item(
 								                    sess,
 								                    turn_context,
 								                    turn_diff_tracker,
 								                    sub_id,
 								                    item.clone(),
 								                )
 								                .await?;
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								                output.push(ProcessedResponseItem { item, response });
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
-												Following up on #2371 post commit feedback (#2852)

- Introduce websearch end to complement the begin 
- Moves the logic of adding the sebsearch tool to
create_tools_json_for_responses_api
- Making it the client responsibility to toggle the tool on or off 
- Other misc in #2371 post commit feedback
- Show the query:

<img width="1392" height="151" alt="image"
src="https://github.com/user-attachments/assets/8457f1a6-f851-44cf-bcca-0d4fe460ce89"
/>
											
										
										
											2025-08-28 19:24:38 -07:00
+								            ResponseEvent::WebSearchCallBegin { call_id } => {
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
+								                let _ = sess
 								                    .tx_event
 								                    .send(Event {
 								                        id: sub_id.to_string(),
-												Following up on #2371 post commit feedback (#2852)

- Introduce websearch end to complement the begin 
- Moves the logic of adding the sebsearch tool to
create_tools_json_for_responses_api
- Making it the client responsibility to toggle the tool on or off 
- Other misc in #2371 post commit feedback
- Show the query:

<img width="1392" height="151" alt="image"
src="https://github.com/user-attachments/assets/8457f1a6-f851-44cf-bcca-0d4fe460ce89"
/>
											
										
										
											2025-08-28 19:24:38 -07:00
+								                        msg: EventMsg::WebSearchBegin(WebSearchBeginEvent { call_id }),
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
+								                    })
 								                    .await;
 								            }
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								            ResponseEvent::RateLimits(snapshot) => {
 								                // Update internal state with latest rate limits, but defer sending until
 								                // token usage is available to avoid duplicate TokenCount events.
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								                sess.update_rate_limits(sub_id, snapshot).await;
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								            }
-												feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.

When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.

Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

![Screenshot 2025-06-25 at 11 20
55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49)

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.

Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:


https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16

Fixes https://github.com/openai/codex/issues/1242
											
										
										
											2025-06-25 23:31:11 -07:00
+								            ResponseEvent::Completed {
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								                response_id: _,
-												feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.

When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.

Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

![Screenshot 2025-06-25 at 11 20
55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49)

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.

Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:


https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16

Fixes https://github.com/openai/codex/issues/1242
											
										
										
											2025-06-25 23:31:11 -07:00
+								                token_usage,
 								            } => {
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								                sess.update_token_usage_info(sub_id, turn_context, token_usage.as_ref())
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                    .await;
-												feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.

When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.

Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

![Screenshot 2025-06-25 at 11 20
55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49)

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.

Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:


https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16

Fixes https://github.com/openai/codex/issues/1242
											
										
										
											2025-06-25 23:31:11 -07:00
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								                let unified_diff = turn_diff_tracker.get_unified_diff();
 								                if let Ok(Some(unified_diff)) = unified_diff {
 								                    let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
 								                    let event = Event {
 								                        id: sub_id.to_string(),
 								                        msg,
 								                    };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                    sess.send_event(event).await;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								                }
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                let result = TurnRunResult {
 								                    processed_items: output,
 								                    total_token_usage: token_usage.clone(),
 								                };
 								                return Ok(result);
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								            ResponseEvent::OutputTextDelta(delta) => {
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								                // In review child threads, suppress assistant text deltas; the
 								                // UI will show a selection popup from the final ReviewOutput.
 								                if !turn_context.is_review_mode {
 								                    let event = Event {
 								                        id: sub_id.to_string(),
 								                        msg: EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }),
 								                    };
 								                    sess.send_event(event).await;
 								                } else {
 								                    trace!("suppressing OutputTextDelta in review mode");
 								                }
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								            }
 								            ResponseEvent::ReasoningSummaryDelta(delta) => {
-												Revert to 3f13ebce10209ab3645f51e7606892b3fd71d47e without rewriting history. Wrong merge

											
										
										
											2025-08-04 17:03:24 -07:00
+								                let event = Event {
 								                    id: sub_id.to_string(),
 								                    msg: EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta }),
 								                };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                sess.send_event(event).await;
-												Re-add markdown streaming (#2029)

Wait for newlines, then render markdown on a line by line basis. Word wrap it for the current terminal size and then spit it out line by line into the UI. Also adds tests and fixes some UI regressions.
											
										
										
											2025-08-12 17:37:28 -07:00
+								            }
 								            ResponseEvent::ReasoningSummaryPartAdded => {
 								                let event = Event {
 								                    id: sub_id.to_string(),
 								                    msg: EventMsg::AgentReasoningSectionBreak(AgentReasoningSectionBreakEvent {}),
 								                };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                sess.send_event(event).await;
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								            }
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								            ResponseEvent::ReasoningContentDelta(delta) => {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                if sess.show_raw_agent_reasoning() {
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								                    let event = Event {
 								                        id: sub_id.to_string(),
 								                        msg: EventMsg::AgentReasoningRawContentDelta(
 								                            AgentReasoningRawContentDeltaEvent { delta },
 								                        ),
 								                    };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                    sess.send_event(event).await;
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								                }
 								            }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
 								    }
 								}
 								async fn handle_response_item(
 								    sess: &Session,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    turn_context: &TurnContext,
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								    turn_diff_tracker: &mut TurnDiffTracker,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    sub_id: &str,
 								    item: ResponseItem,
 								) -> CodexResult<Option<ResponseInputItem>> {
 								    debug!(?item, "Output item");
-												feat: add support for OpenAI tool type, local_shell (#961)

The new `codex-mini-latest` model expects a new tool with `{"type":
"local_shell"}`. Its contract is similar to the existing `function` tool
with `"name": "shell"`, so this takes the `local_shell` tool call into
`ExecParams` and sends it through the existing
`handle_container_exec_with_params()` code path.

This also adds the following logic when adding the default set of tools
to a request:

```rust
let default_tools = if self.model.starts_with("codex") {
    &DEFAULT_CODEX_MODEL_TOOLS
} else {
    &DEFAULT_TOOLS
};
```

That is, if the model name starts with `"codex"`, we add `{"type":
"local_shell"}` to the list of tools; otherwise, we add the
aforementioned `shell` tool.

To test this, I ran the TUI with `-m codex-mini-latest` and verified
that it used the `local_shell` tool. Though I also had some entries in
`[mcp_servers]` in my personal `config.toml`. The `codex-mini-latest`
model seemed eager to try the tools from the MCP servers first, so I
have personally commented them out for now, so keep an eye out if you're
testing `codex-mini-latest`!

Perhaps we should include more details with `{"type": "local_shell"}` or
update the following:


https://github.com/openai/codex/blob/fd0b1b020818dfe8aaf7eb68425f09e86ab1b819/codex-rs/core/prompt.md

For reference, the corresponding change in the TypeScript CLI is
https://github.com/openai/codex/pull/951.
											
										
										
											2025-05-16 14:38:08 -07:00
+								    let output = match item {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        ResponseItem::FunctionCall {
 								            name,
 								            arguments,
 								            call_id,
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								            ..
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        } => {
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								            info!("FunctionCall: {name}({arguments})");
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            if let Some((server, tool_name)) =
 								                sess.services.mcp_connection_manager.parse_tool_name(&name)
 								            {
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								                let resp = handle_mcp_tool_call(
 								                    sess,
 								                    sub_id,
 								                    call_id.clone(),
 								                    server,
 								                    tool_name,
 								                    arguments,
 								                )
 								                .await;
 								                Some(resp)
 								            } else {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                let result = turn_context
 								                    .client
 								                    .get_otel_event_manager()
 								                    .log_tool_result(name.as_str(), call_id.as_str(), arguments.as_str(), || {
 								                        handle_function_call(
 								                            sess,
 								                            turn_context,
 								                            turn_diff_tracker,
 								                            sub_id.to_string(),
 								                            name.to_owned(),
 								                            arguments.to_owned(),
 								                            call_id.clone(),
 								                        )
 								                    })
 								                    .await;
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
 								                let output = match result {
 								                    Ok(content) => FunctionCallOutputPayload {
 								                        content,
 								                        success: Some(true),
 								                    },
 								                    Err(FunctionCallError::RespondToModel(msg)) => FunctionCallOutputPayload {
 								                        content: msg,
 								                        success: Some(false),
 								                    },
 								                };
 								                Some(ResponseInputItem::FunctionCallOutput { call_id, output })
 								            }
-												feat: add support for OpenAI tool type, local_shell (#961)

The new `codex-mini-latest` model expects a new tool with `{"type":
"local_shell"}`. Its contract is similar to the existing `function` tool
with `"name": "shell"`, so this takes the `local_shell` tool call into
`ExecParams` and sends it through the existing
`handle_container_exec_with_params()` code path.

This also adds the following logic when adding the default set of tools
to a request:

```rust
let default_tools = if self.model.starts_with("codex") {
    &DEFAULT_CODEX_MODEL_TOOLS
} else {
    &DEFAULT_TOOLS
};
```

That is, if the model name starts with `"codex"`, we add `{"type":
"local_shell"}` to the list of tools; otherwise, we add the
aforementioned `shell` tool.

To test this, I ran the TUI with `-m codex-mini-latest` and verified
that it used the `local_shell` tool. Though I also had some entries in
`[mcp_servers]` in my personal `config.toml`. The `codex-mini-latest`
model seemed eager to try the tools from the MCP servers first, so I
have personally commented them out for now, so keep an eye out if you're
testing `codex-mini-latest`!

Perhaps we should include more details with `{"type": "local_shell"}` or
update the following:


https://github.com/openai/codex/blob/fd0b1b020818dfe8aaf7eb68425f09e86ab1b819/codex-rs/core/prompt.md

For reference, the corresponding change in the TypeScript CLI is
https://github.com/openai/codex/pull/951.
											
										
										
											2025-05-16 14:38:08 -07:00
+								        }
 								        ResponseItem::LocalShellCall {
 								            id,
 								            call_id,
 								            status: _,
 								            action,
 								        } => {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								            let name = "local_shell";
-												feat: add support for OpenAI tool type, local_shell (#961)

The new `codex-mini-latest` model expects a new tool with `{"type":
"local_shell"}`. Its contract is similar to the existing `function` tool
with `"name": "shell"`, so this takes the `local_shell` tool call into
`ExecParams` and sends it through the existing
`handle_container_exec_with_params()` code path.

This also adds the following logic when adding the default set of tools
to a request:

```rust
let default_tools = if self.model.starts_with("codex") {
    &DEFAULT_CODEX_MODEL_TOOLS
} else {
    &DEFAULT_TOOLS
};
```

That is, if the model name starts with `"codex"`, we add `{"type":
"local_shell"}` to the list of tools; otherwise, we add the
aforementioned `shell` tool.

To test this, I ran the TUI with `-m codex-mini-latest` and verified
that it used the `local_shell` tool. Though I also had some entries in
`[mcp_servers]` in my personal `config.toml`. The `codex-mini-latest`
model seemed eager to try the tools from the MCP servers first, so I
have personally commented them out for now, so keep an eye out if you're
testing `codex-mini-latest`!

Perhaps we should include more details with `{"type": "local_shell"}` or
update the following:


https://github.com/openai/codex/blob/fd0b1b020818dfe8aaf7eb68425f09e86ab1b819/codex-rs/core/prompt.md

For reference, the corresponding change in the TypeScript CLI is
https://github.com/openai/codex/pull/951.
											
										
										
											2025-05-16 14:38:08 -07:00
+								            let LocalShellAction::Exec(action) = action;
 								            tracing::info!("LocalShellCall: {action:?}");
 								            let params = ShellToolCallParams {
 								                command: action.command,
 								                workdir: action.working_directory,
 								                timeout_ms: action.timeout_ms,
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								                with_escalated_permissions: None,
 								                justification: None,
-												feat: add support for OpenAI tool type, local_shell (#961)

The new `codex-mini-latest` model expects a new tool with `{"type":
"local_shell"}`. Its contract is similar to the existing `function` tool
with `"name": "shell"`, so this takes the `local_shell` tool call into
`ExecParams` and sends it through the existing
`handle_container_exec_with_params()` code path.

This also adds the following logic when adding the default set of tools
to a request:

```rust
let default_tools = if self.model.starts_with("codex") {
    &DEFAULT_CODEX_MODEL_TOOLS
} else {
    &DEFAULT_TOOLS
};
```

That is, if the model name starts with `"codex"`, we add `{"type":
"local_shell"}` to the list of tools; otherwise, we add the
aforementioned `shell` tool.

To test this, I ran the TUI with `-m codex-mini-latest` and verified
that it used the `local_shell` tool. Though I also had some entries in
`[mcp_servers]` in my personal `config.toml`. The `codex-mini-latest`
model seemed eager to try the tools from the MCP servers first, so I
have personally commented them out for now, so keep an eye out if you're
testing `codex-mini-latest`!

Perhaps we should include more details with `{"type": "local_shell"}` or
update the following:


https://github.com/openai/codex/blob/fd0b1b020818dfe8aaf7eb68425f09e86ab1b819/codex-rs/core/prompt.md

For reference, the corresponding change in the TypeScript CLI is
https://github.com/openai/codex/pull/951.
											
										
										
											2025-05-16 14:38:08 -07:00
+								            };
 								            let effective_call_id = match (call_id, id) {
 								                (Some(call_id), _) => call_id,
 								                (None, Some(id)) => id,
 								                (None, None) => {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                    let error_message = "LocalShellCall without call_id or id";
 								                    turn_context
 								                        .client
 								                        .get_otel_event_manager()
 								                        .log_tool_failed(name, error_message);
 								                    error!(error_message);
-												feat: add support for OpenAI tool type, local_shell (#961)

The new `codex-mini-latest` model expects a new tool with `{"type":
"local_shell"}`. Its contract is similar to the existing `function` tool
with `"name": "shell"`, so this takes the `local_shell` tool call into
`ExecParams` and sends it through the existing
`handle_container_exec_with_params()` code path.

This also adds the following logic when adding the default set of tools
to a request:

```rust
let default_tools = if self.model.starts_with("codex") {
    &DEFAULT_CODEX_MODEL_TOOLS
} else {
    &DEFAULT_TOOLS
};
```

That is, if the model name starts with `"codex"`, we add `{"type":
"local_shell"}` to the list of tools; otherwise, we add the
aforementioned `shell` tool.

To test this, I ran the TUI with `-m codex-mini-latest` and verified
that it used the `local_shell` tool. Though I also had some entries in
`[mcp_servers]` in my personal `config.toml`. The `codex-mini-latest`
model seemed eager to try the tools from the MCP servers first, so I
have personally commented them out for now, so keep an eye out if you're
testing `codex-mini-latest`!

Perhaps we should include more details with `{"type": "local_shell"}` or
update the following:


https://github.com/openai/codex/blob/fd0b1b020818dfe8aaf7eb68425f09e86ab1b819/codex-rs/core/prompt.md

For reference, the corresponding change in the TypeScript CLI is
https://github.com/openai/codex/pull/951.
											
										
										
											2025-05-16 14:38:08 -07:00
+								                    return Ok(Some(ResponseInputItem::FunctionCallOutput {
 								                        call_id: "".to_string(),
 								                        output: FunctionCallOutputPayload {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                            content: error_message.to_string(),
-												feat: add support for OpenAI tool type, local_shell (#961)

The new `codex-mini-latest` model expects a new tool with `{"type":
"local_shell"}`. Its contract is similar to the existing `function` tool
with `"name": "shell"`, so this takes the `local_shell` tool call into
`ExecParams` and sends it through the existing
`handle_container_exec_with_params()` code path.

This also adds the following logic when adding the default set of tools
to a request:

```rust
let default_tools = if self.model.starts_with("codex") {
    &DEFAULT_CODEX_MODEL_TOOLS
} else {
    &DEFAULT_TOOLS
};
```

That is, if the model name starts with `"codex"`, we add `{"type":
"local_shell"}` to the list of tools; otherwise, we add the
aforementioned `shell` tool.

To test this, I ran the TUI with `-m codex-mini-latest` and verified
that it used the `local_shell` tool. Though I also had some entries in
`[mcp_servers]` in my personal `config.toml`. The `codex-mini-latest`
model seemed eager to try the tools from the MCP servers first, so I
have personally commented them out for now, so keep an eye out if you're
testing `codex-mini-latest`!

Perhaps we should include more details with `{"type": "local_shell"}` or
update the following:


https://github.com/openai/codex/blob/fd0b1b020818dfe8aaf7eb68425f09e86ab1b819/codex-rs/core/prompt.md

For reference, the corresponding change in the TypeScript CLI is
https://github.com/openai/codex/pull/951.
											
										
										
											2025-05-16 14:38:08 -07:00
+								                            success: None,
 								                        },
 								                    }));
 								                }
 								            };
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								            let exec_params = to_exec_params(params, turn_context);
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                let result = turn_context
 								                    .client
 								                    .get_otel_event_manager()
 								                    .log_tool_result(
 								                        name,
 								                        effective_call_id.as_str(),
 								                        exec_params.command.join(" ").as_str(),
 								                        || {
 								                            handle_container_exec_with_params(
 								                                name,
 								                                exec_params,
 								                                sess,
 								                                turn_context,
 								                                turn_diff_tracker,
 								                                sub_id.to_string(),
 								                                effective_call_id.clone(),
 								                            )
 								                        },
 								                    )
 								                    .await;
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
 								                let output = match result {
 								                    Ok(content) => FunctionCallOutputPayload {
 								                        content,
 								                        success: Some(true),
 								                    },
 								                    Err(FunctionCallError::RespondToModel(msg)) => FunctionCallOutputPayload {
 								                        content: msg,
 								                        success: Some(false),
 								                    },
 								                };
 								                Some(ResponseInputItem::FunctionCallOutput {
 								                    call_id: effective_call_id,
 								                    output,
 								                })
 								            }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								        ResponseItem::CustomToolCall {
 								            id: _,
 								            call_id,
 								            name,
 								            input,
 								            status: _,
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								        } => {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								            let result = turn_context
 								                .client
 								                .get_otel_event_manager()
 								                .log_tool_result(name.as_str(), call_id.as_str(), input.as_str(), || {
 								                    handle_custom_tool_call(
 								                        sess,
 								                        turn_context,
 								                        turn_diff_tracker,
 								                        sub_id.to_string(),
 								                        name.to_owned(),
 								                        input.to_owned(),
 								                        call_id.clone(),
 								                    )
 								                })
 								                .await;
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
 								            let output = match result {
 								                Ok(content) => content,
 								                Err(FunctionCallError::RespondToModel(msg)) => msg,
 								            };
 								            Some(ResponseInputItem::CustomToolCallOutput { call_id, output })
 								        }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        ResponseItem::FunctionCallOutput { .. } => {
 								            debug!("unexpected FunctionCallOutput from stream");
-												feat: add support for OpenAI tool type, local_shell (#961)

The new `codex-mini-latest` model expects a new tool with `{"type":
"local_shell"}`. Its contract is similar to the existing `function` tool
with `"name": "shell"`, so this takes the `local_shell` tool call into
`ExecParams` and sends it through the existing
`handle_container_exec_with_params()` code path.

This also adds the following logic when adding the default set of tools
to a request:

```rust
let default_tools = if self.model.starts_with("codex") {
    &DEFAULT_CODEX_MODEL_TOOLS
} else {
    &DEFAULT_TOOLS
};
```

That is, if the model name starts with `"codex"`, we add `{"type":
"local_shell"}` to the list of tools; otherwise, we add the
aforementioned `shell` tool.

To test this, I ran the TUI with `-m codex-mini-latest` and verified
that it used the `local_shell` tool. Though I also had some entries in
`[mcp_servers]` in my personal `config.toml`. The `codex-mini-latest`
model seemed eager to try the tools from the MCP servers first, so I
have personally commented them out for now, so keep an eye out if you're
testing `codex-mini-latest`!

Perhaps we should include more details with `{"type": "local_shell"}` or
update the following:


https://github.com/openai/codex/blob/fd0b1b020818dfe8aaf7eb68425f09e86ab1b819/codex-rs/core/prompt.md

For reference, the corresponding change in the TypeScript CLI is
https://github.com/openai/codex/pull/951.
											
										
										
											2025-05-16 14:38:08 -07:00
+								            None
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								        ResponseItem::CustomToolCallOutput { .. } => {
 								            debug!("unexpected CustomToolCallOutput from stream");
 								            None
 								        }
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
+								        ResponseItem::Message { .. }
 								        | ResponseItem::Reasoning { .. }
 								        | ResponseItem::WebSearchCall { .. } => {
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								            // In review child threads, suppress assistant message events but
 								            // keep reasoning/web search.
 								            let msgs = match &item {
 								                ResponseItem::Message { .. } if turn_context.is_review_mode => {
 								                    trace!("suppressing assistant Message in review mode");
 								                    Vec::new()
 								                }
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                _ => map_response_item_to_event_messages(&item, sess.show_raw_agent_reasoning()),
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								            };
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
+								            for msg in msgs {
-												Following up on #2371 post commit feedback (#2852)

- Introduce websearch end to complement the begin 
- Moves the logic of adding the sebsearch tool to
create_tools_json_for_responses_api
- Making it the client responsibility to toggle the tool on or off 
- Other misc in #2371 post commit feedback
- Show the query:

<img width="1392" height="151" alt="image"
src="https://github.com/user-attachments/assets/8457f1a6-f851-44cf-bcca-0d4fe460ce89"
/>
											
										
										
											2025-08-28 19:24:38 -07:00
+								                let event = Event {
 								                    id: sub_id.to_string(),
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
+								                    msg,
-												Following up on #2371 post commit feedback (#2852)

- Introduce websearch end to complement the begin 
- Moves the logic of adding the sebsearch tool to
create_tools_json_for_responses_api
- Making it the client responsibility to toggle the tool on or off 
- Other misc in #2371 post commit feedback
- Show the query:

<img width="1392" height="151" alt="image"
src="https://github.com/user-attachments/assets/8457f1a6-f851-44cf-bcca-0d4fe460ce89"
/>
											
										
										
											2025-08-28 19:24:38 -07:00
+								                };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                sess.send_event(event).await;
-												Following up on #2371 post commit feedback (#2852)

- Introduce websearch end to complement the begin 
- Moves the logic of adding the sebsearch tool to
create_tools_json_for_responses_api
- Making it the client responsibility to toggle the tool on or off 
- Other misc in #2371 post commit feedback
- Show the query:

<img width="1392" height="151" alt="image"
src="https://github.com/user-attachments/assets/8457f1a6-f851-44cf-bcca-0d4fe460ce89"
/>
											
										
										
											2025-08-28 19:24:38 -07:00
+								            }
 								            None
 								        }
-												feat: add support for OpenAI tool type, local_shell (#961)

The new `codex-mini-latest` model expects a new tool with `{"type":
"local_shell"}`. Its contract is similar to the existing `function` tool
with `"name": "shell"`, so this takes the `local_shell` tool call into
`ExecParams` and sends it through the existing
`handle_container_exec_with_params()` code path.

This also adds the following logic when adding the default set of tools
to a request:

```rust
let default_tools = if self.model.starts_with("codex") {
    &DEFAULT_CODEX_MODEL_TOOLS
} else {
    &DEFAULT_TOOLS
};
```

That is, if the model name starts with `"codex"`, we add `{"type":
"local_shell"}` to the list of tools; otherwise, we add the
aforementioned `shell` tool.

To test this, I ran the TUI with `-m codex-mini-latest` and verified
that it used the `local_shell` tool. Though I also had some entries in
`[mcp_servers]` in my personal `config.toml`. The `codex-mini-latest`
model seemed eager to try the tools from the MCP servers first, so I
have personally commented them out for now, so keep an eye out if you're
testing `codex-mini-latest`!

Perhaps we should include more details with `{"type": "local_shell"}` or
update the following:


https://github.com/openai/codex/blob/fd0b1b020818dfe8aaf7eb68425f09e86ab1b819/codex-rs/core/prompt.md

For reference, the corresponding change in the TypeScript CLI is
https://github.com/openai/codex/pull/951.
											
										
										
											2025-05-16 14:38:08 -07:00
+								        ResponseItem::Other => None,
 								    };
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    Ok(output)
 								}
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								async fn handle_unified_exec_tool_call(
 								    sess: &Session,
 								    session_id: Option<String>,
 								    arguments: Vec<String>,
 								    timeout_ms: Option<u64>,
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								) -> Result<String, FunctionCallError> {
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								    let parsed_session_id = if let Some(session_id) = session_id {
 								        match session_id.parse::<i32>() {
 								            Ok(parsed) => Some(parsed),
 								            Err(output) => {
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								                return Err(FunctionCallError::RespondToModel(format!(
 								                    "invalid session_id: {session_id} due to error {output:?}"
 								                )));
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								            }
 								        }
 								    } else {
 								        None
 								    };
 								    let request = crate::unified_exec::UnifiedExecRequest {
 								        session_id: parsed_session_id,
 								        input_chunks: &arguments,
 								        timeout_ms,
 								    };
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								    let value = sess
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        .services
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								        .unified_exec_manager
 								        .handle_request(request)
 								        .await
 								        .map_err(|err| {
 								            FunctionCallError::RespondToModel(format!("unified exec failed: {err:?}"))
 								        })?;
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								    #[derive(Serialize)]
 								    struct SerializedUnifiedExecResult {
 								        session_id: Option<String>,
 								        output: String,
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								    }
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
 								    serde_json::to_string(&SerializedUnifiedExecResult {
 								        session_id: value.session_id.map(|id| id.to_string()),
 								        output: value.output,
 								    })
 								    .map_err(|err| {
 								        FunctionCallError::RespondToModel(format!(
 								            "failed to serialize unified exec output: {err:?}"
 								        ))
 								    })
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								async fn handle_function_call(
 								    sess: &Session,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    turn_context: &TurnContext,
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								    turn_diff_tracker: &mut TurnDiffTracker,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    sub_id: String,
 								    name: String,
 								    arguments: String,
 								    call_id: String,
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								) -> Result<String, FunctionCallError> {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    match name.as_str() {
 								        "container.exec" | "shell" => {
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            let params = parse_container_exec_arguments(arguments, turn_context, &call_id)?;
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								            handle_container_exec_with_params(
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                name.as_str(),
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								                params,
 								                sess,
 								                turn_context,
 								                turn_diff_tracker,
 								                sub_id,
 								                call_id,
 								            )
 								            .await
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								        }
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								        "unified_exec" => {
 								            #[derive(Deserialize)]
 								            struct UnifiedExecArgs {
 								                input: Vec<String>,
 								                #[serde(default)]
 								                session_id: Option<String>,
 								                #[serde(default)]
 								                timeout_ms: Option<u64>,
 								            }
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            let args: UnifiedExecArgs = serde_json::from_str(&arguments).map_err(|err| {
 								                FunctionCallError::RespondToModel(format!(
 								                    "failed to parse function arguments: {err:?}"
 								                ))
 								            })?;
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            handle_unified_exec_tool_call(sess, args.session_id, args.input, args.timeout_ms).await
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								        }
-												Add "View Image" tool (#2723)

Adds a "View Image" tool so Codex can find and see images by itself:

<img width="1772" height="420" alt="Screenshot 2025-08-26 at 10 40
04 AM"
src="https://github.com/user-attachments/assets/7a459c7b-0b86-4125-82d9-05fbb35ade03"
/>
											
										
										
											2025-08-27 17:41:23 -07:00
+								        "view_image" => {
 								            #[derive(serde::Deserialize)]
 								            struct SeeImageArgs {
 								                path: String,
 								            }
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            let args: SeeImageArgs = serde_json::from_str(&arguments).map_err(|e| {
 								                FunctionCallError::RespondToModel(format!(
 								                    "failed to parse function arguments: {e:?}"
 								                ))
 								            })?;
-												Add "View Image" tool (#2723)

Adds a "View Image" tool so Codex can find and see images by itself:

<img width="1772" height="420" alt="Screenshot 2025-08-26 at 10 40
04 AM"
src="https://github.com/user-attachments/assets/7a459c7b-0b86-4125-82d9-05fbb35ade03"
/>
											
										
										
											2025-08-27 17:41:23 -07:00
+								            let abs = turn_context.resolve_path(Some(args.path));
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            sess.inject_input(vec![InputItem::LocalImage { path: abs }])
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                .await
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								                .map_err(|_| {
 								                    FunctionCallError::RespondToModel(
 								                        "unable to attach image (no active task)".to_string(),
 								                    )
 								                })?;
 								            Ok("attached local image path".to_string())
-												Add "View Image" tool (#2723)

Adds a "View Image" tool so Codex can find and see images by itself:

<img width="1772" height="420" alt="Screenshot 2025-08-26 at 10 40
04 AM"
src="https://github.com/user-attachments/assets/7a459c7b-0b86-4125-82d9-05fbb35ade03"
/>
											
										
										
											2025-08-27 17:41:23 -07:00
+								        }
-												[tools] Add apply_patch tool (#2303)

## Summary
We've been seeing a number of issues and reports with our synthetic
`apply_patch` tool, e.g. #802. Let's make this a real tool - in my
anecdotal testing, it's critical for GPT-OSS models, but I'd like to
make it the standard across GPT-5 and codex models as well.

## Testing
- [x] Tested locally
- [x] Integration test
											
										
										
											2025-08-15 11:55:53 -04:00
+								        "apply_patch" => {
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            let args: ApplyPatchToolArgs = serde_json::from_str(&arguments).map_err(|e| {
 								                FunctionCallError::RespondToModel(format!(
 								                    "failed to parse function arguments: {e:?}"
 								                ))
 								            })?;
-												[tools] Add apply_patch tool (#2303)

## Summary
We've been seeing a number of issues and reports with our synthetic
`apply_patch` tool, e.g. #802. Let's make this a real tool - in my
anecdotal testing, it's critical for GPT-OSS models, but I'd like to
make it the standard across GPT-5 and codex models as well.

## Testing
- [x] Tested locally
- [x] Integration test
											
										
										
											2025-08-15 11:55:53 -04:00
+								            let exec_params = ExecParams {
 								                command: vec!["apply_patch".to_string(), args.input.clone()],
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								                cwd: turn_context.cwd.clone(),
-												[tools] Add apply_patch tool (#2303)

## Summary
We've been seeing a number of issues and reports with our synthetic
`apply_patch` tool, e.g. #802. Let's make this a real tool - in my
anecdotal testing, it's critical for GPT-OSS models, but I'd like to
make it the standard across GPT-5 and codex models as well.

## Testing
- [x] Tested locally
- [x] Integration test
											
										
										
											2025-08-15 11:55:53 -04:00
+								                timeout_ms: None,
 								                env: HashMap::new(),
 								                with_escalated_permissions: None,
 								                justification: None,
 								            };
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								            handle_container_exec_with_params(
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                name.as_str(),
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								                exec_params,
 								                sess,
 								                turn_context,
 								                turn_diff_tracker,
 								                sub_id,
 								                call_id,
 								            )
 								            .await
-												[tools] Add apply_patch tool (#2303)

## Summary
We've been seeing a number of issues and reports with our synthetic
`apply_patch` tool, e.g. #802. Let's make this a real tool - in my
anecdotal testing, it's critical for GPT-OSS models, but I'd like to
make it the standard across GPT-5 and codex models as well.

## Testing
- [x] Tested locally
- [x] Integration test
											
										
										
											2025-08-15 11:55:53 -04:00
+								        }
-												Add an experimental plan tool (#1726)

This adds a tool the model can call to update a plan. The tool doesn't
actually _do_ anything but it gives clients a chance to read and render
the structured plan. We will likely iterate on the prompt and tools
exposed for planning over time.
											
										
										
											2025-07-29 11:22:02 -07:00
+								        "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
-												feat: StreamableShell with exec_command and write_stdin tools (#2574)


											
										
										
											2025-08-22 18:10:55 -07:00
+								        EXEC_COMMAND_TOOL_NAME => {
 								            // TODO(mbolin): Sandbox check.
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            let exec_params: ExecCommandParams = serde_json::from_str(&arguments).map_err(|e| {
 								                FunctionCallError::RespondToModel(format!(
 								                    "failed to parse function arguments: {e:?}"
 								                ))
 								            })?;
-												fix: Scope ExecSessionManager to Session instead of using global singleton (#2664)

The `SessionManager` in `exec_command` owns a number of
`ExecCommandSession` objects where `ExecCommandSession` has a
non-trivial implementation of `Drop`, so we want to be able to drop an
individual `SessionManager` to help ensure things get cleaned up in a
timely fashion. To that end, we should have one `SessionManager` per
session rather than one global one for the lifetime of the CLI process.
											
										
										
											2025-08-24 22:52:49 -07:00
+								            let result = sess
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                .services
-												fix: Scope ExecSessionManager to Session instead of using global singleton (#2664)

The `SessionManager` in `exec_command` owns a number of
`ExecCommandSession` objects where `ExecCommandSession` has a
non-trivial implementation of `Drop`, so we want to be able to drop an
individual `SessionManager` to help ensure things get cleaned up in a
timely fashion. To that end, we should have one `SessionManager` per
session rather than one global one for the lifetime of the CLI process.
											
										
										
											2025-08-24 22:52:49 -07:00
+								                .session_manager
-												feat: StreamableShell with exec_command and write_stdin tools (#2574)


											
										
										
											2025-08-22 18:10:55 -07:00
+								                .handle_exec_command_request(exec_params)
 								                .await;
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            match result {
 								                Ok(output) => Ok(output.to_text_output()),
 								                Err(err) => Err(FunctionCallError::RespondToModel(err)),
-												feat: StreamableShell with exec_command and write_stdin tools (#2574)


											
										
										
											2025-08-22 18:10:55 -07:00
+								            }
 								        }
 								        WRITE_STDIN_TOOL_NAME => {
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            let write_stdin_params =
 								                serde_json::from_str::<WriteStdinParams>(&arguments).map_err(|e| {
 								                    FunctionCallError::RespondToModel(format!(
 								                        "failed to parse function arguments: {e:?}"
 								                    ))
 								                })?;
-												fix: Scope ExecSessionManager to Session instead of using global singleton (#2664)

The `SessionManager` in `exec_command` owns a number of
`ExecCommandSession` objects where `ExecCommandSession` has a
non-trivial implementation of `Drop`, so we want to be able to drop an
individual `SessionManager` to help ensure things get cleaned up in a
timely fashion. To that end, we should have one `SessionManager` per
session rather than one global one for the lifetime of the CLI process.
											
										
										
											2025-08-24 22:52:49 -07:00
+								            let result = sess
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                .services
-												fix: Scope ExecSessionManager to Session instead of using global singleton (#2664)

The `SessionManager` in `exec_command` owns a number of
`ExecCommandSession` objects where `ExecCommandSession` has a
non-trivial implementation of `Drop`, so we want to be able to drop an
individual `SessionManager` to help ensure things get cleaned up in a
timely fashion. To that end, we should have one `SessionManager` per
session rather than one global one for the lifetime of the CLI process.
											
										
										
											2025-08-24 22:52:49 -07:00
+								                .session_manager
-												feat: StreamableShell with exec_command and write_stdin tools (#2574)


											
										
										
											2025-08-22 18:10:55 -07:00
+								                .handle_write_stdin_request(write_stdin_params)
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								                .await
 								                .map_err(FunctionCallError::RespondToModel)?;
 								            Ok(result.to_text_output())
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								        }
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								        _ => Err(FunctionCallError::RespondToModel(format!(
 								            "unsupported call: {name}"
 								        ))),
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								    }
 								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								async fn handle_custom_tool_call(
 								    sess: &Session,
 								    turn_context: &TurnContext,
 								    turn_diff_tracker: &mut TurnDiffTracker,
 								    sub_id: String,
 								    name: String,
 								    input: String,
 								    call_id: String,
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								) -> Result<String, FunctionCallError> {
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								    info!("CustomToolCall: {name} {input}");
 								    match name.as_str() {
 								        "apply_patch" => {
 								            let exec_params = ExecParams {
 								                command: vec!["apply_patch".to_string(), input.clone()],
 								                cwd: turn_context.cwd.clone(),
 								                timeout_ms: None,
 								                env: HashMap::new(),
 								                with_escalated_permissions: None,
 								                justification: None,
 								            };
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
 								            handle_container_exec_with_params(
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                name.as_str(),
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								                exec_params,
 								                sess,
 								                turn_context,
 								                turn_diff_tracker,
 								                sub_id,
 								                call_id,
 								            )
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            .await
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								        }
 								        _ => {
 								            debug!("unexpected CustomToolCall from stream");
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            Err(FunctionCallError::RespondToModel(format!(
 								                "unsupported custom tool call: {name}"
 								            )))
-												[apply_patch] freeform apply_patch tool (#2576)

## Summary
GPT-5 introduced the concept of [custom
tools](https://platform.openai.com/docs/guides/function-calling#custom-tools),
which allow the model to send a raw string result back, simplifying
json-escape issues. We are migrating gpt-5 to use this by default.

However, gpt-oss models do not support custom tools, only normal
functions. So we keep both tool definitions, and provide whichever one
the model family supports.

## Testing
- [x] Tested locally with various models
- [x] Unit tests pass
											
										
										
											2025-08-22 13:42:34 -07:00
+								        }
 								    }
 								}
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								fn to_exec_params(params: ShellToolCallParams, turn_context: &TurnContext) -> ExecParams {
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								    ExecParams {
 								        command: params.command,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        cwd: turn_context.resolve_path(params.workdir.clone()),
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								        timeout_ms: params.timeout_ms,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        env: create_env(&turn_context.shell_environment_policy),
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								        with_escalated_permissions: params.with_escalated_permissions,
 								        justification: params.justification,
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								    }
 								}
 								fn parse_container_exec_arguments(
 								    arguments: String,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    turn_context: &TurnContext,
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								    _call_id: &str,
 								) -> Result<ExecParams, FunctionCallError> {
 								    serde_json::from_str::<ShellToolCallParams>(&arguments)
 								        .map(|p| to_exec_params(p, turn_context))
 								        .map_err(|e| {
 								            FunctionCallError::RespondToModel(format!("failed to parse function arguments: {e:?}"))
 								        })
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								}
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								pub struct ExecInvokeArgs<'a> {
 								    pub params: ExecParams,
 								    pub sandbox_type: SandboxType,
 								    pub sandbox_policy: &'a SandboxPolicy,
-												fix: ensure cwd for conversation and sandbox are separate concerns (#3874)

Previous to this PR, both of these functions take a single `cwd`:


https://github.com/openai/codex/blob/71038381aa0f51aa62e1a2bcc7cbf26a05b141f3/codex-rs/core/src/seatbelt.rs#L19-L25


https://github.com/openai/codex/blob/71038381aa0f51aa62e1a2bcc7cbf26a05b141f3/codex-rs/core/src/landlock.rs#L16-L23

whereas `cwd` and `sandbox_cwd` should be set independently (fixed in
this PR).

Added `sandbox_distinguishes_command_and_policy_cwds()` to
`codex-rs/exec/tests/suite/sandbox.rs` to verify this.
											
										
										
											2025-09-18 14:37:06 -07:00
+								    pub sandbox_cwd: &'a Path,
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								    pub codex_linux_sandbox_exe: &'a Option<PathBuf>,
 								    pub stdout_stream: Option<StdoutStream>,
 								}
-												Bridge command generation to powershell when on Windows (#2319)

## What? Why? How?
- When running on Windows, codex often tries to invoke bash commands,
which commonly fail (unless WSL is installed)
- Fix: Detect if powershell is available and, if so, route commands to
it
- Also add a shell_name property to environmental context for codex to
default to powershell commands when running in that environment

## Testing
- Tested within WSL and powershell (e.g. get top 5 largest files within
a folder and validated that commands generated were powershell commands)
- Tested within Zsh
- Updated unit tests

---------

Co-authored-by: Eddy Escardo <eddy@openai.com>
											
										
										
											2025-08-20 16:30:34 -07:00
+								fn maybe_translate_shell_command(
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    params: ExecParams,
 								    sess: &Session,
 								    turn_context: &TurnContext,
 								) -> ExecParams {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								    let should_translate = matches!(sess.user_shell(), crate::shell::Shell::PowerShell(_))
-												Back out "feat: POSIX unification and snapshot sessions (#3179)" (#3430)

This reverts https://github.com/openai/codex/pull/3179.

#3179 appears to introduce a regression where sourcing dotfiles causes a
bunch of activity in the title bar (and potentially slows things down?)


https://github.com/user-attachments/assets/a68f7fb3-0749-4e0e-a321-2aa6993e01da

Verified this no longer happens after backing out #3179.

Original commit changeset: 62bd0e3d9d5a
											
										
										
											2025-09-10 12:40:24 -07:00
+								        || turn_context.shell_environment_policy.use_profile;
-												Bridge command generation to powershell when on Windows (#2319)

## What? Why? How?
- When running on Windows, codex often tries to invoke bash commands,
which commonly fail (unless WSL is installed)
- Fix: Detect if powershell is available and, if so, route commands to
it
- Also add a shell_name property to environmental context for codex to
default to powershell commands when running in that environment

## Testing
- Tested within WSL and powershell (e.g. get top 5 largest files within
a folder and validated that commands generated were powershell commands)
- Tested within Zsh
- Updated unit tests

---------

Co-authored-by: Eddy Escardo <eddy@openai.com>
											
										
										
											2025-08-20 16:30:34 -07:00
 								    if should_translate
 								        && let Some(command) = sess
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            .user_shell()
-												Bridge command generation to powershell when on Windows (#2319)

## What? Why? How?
- When running on Windows, codex often tries to invoke bash commands,
which commonly fail (unless WSL is installed)
- Fix: Detect if powershell is available and, if so, route commands to
it
- Also add a shell_name property to environmental context for codex to
default to powershell commands when running in that environment

## Testing
- Tested within WSL and powershell (e.g. get top 5 largest files within
a folder and validated that commands generated were powershell commands)
- Tested within Zsh
- Updated unit tests

---------

Co-authored-by: Eddy Escardo <eddy@openai.com>
											
										
										
											2025-08-20 16:30:34 -07:00
+								            .format_default_shell_invocation(params.command.clone())
 								    {
 								        return ExecParams { command, ..params };
-												Optionally run using user profile (#1678)


											
										
										
											2025-07-25 11:45:23 -07:00
+								    }
 								    params
 								}
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								async fn handle_container_exec_with_params(
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								    tool_name: &str,
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								    params: ExecParams,
 								    sess: &Session,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    turn_context: &TurnContext,
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								    turn_diff_tracker: &mut TurnDiffTracker,
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								    sub_id: String,
 								    call_id: String,
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								) -> Result<String, FunctionCallError> {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								    let otel_event_manager = turn_context.client.get_otel_event_manager();
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								    if params.with_escalated_permissions.unwrap_or(false)
 								        && !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
 								    {
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								        return Err(FunctionCallError::RespondToModel(format!(
 								            "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
 								            policy = turn_context.approval_policy
 								        )));
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								    }
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								    // check if this was a patch, and apply it if so
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								    let apply_patch_exec = match maybe_parse_apply_patch_verified(&params.command, &params.cwd) {
 								        MaybeApplyPatchVerified::Body(changes) => {
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								            match apply_patch::apply_patch(sess, turn_context, &sub_id, &call_id, changes).await {
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								                InternalApplyPatchInvocation::Output(item) => return item,
 								                InternalApplyPatchInvocation::DelegateToExec(apply_patch_exec) => {
 								                    Some(apply_patch_exec)
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								                }
 								            }
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								        }
 								        MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
 								            // It looks like an invocation of `apply_patch`, but we
 								            // could not resolve it into a patch that would apply
 								            // cleanly. Return to model for resample.
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            return Err(FunctionCallError::RespondToModel(format!(
 								                "error: {parse_error:#?}"
 								            )));
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								        }
 								        MaybeApplyPatchVerified::ShellParseError(error) => {
 								            trace!("Failed to parse shell command, {error:?}");
 								            None
 								        }
 								        MaybeApplyPatchVerified::NotApplyPatch => None,
 								    };
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								    let (params, safety, command_for_display) = match &apply_patch_exec {
 								        Some(ApplyPatchExec {
 								            action: ApplyPatchAction { patch, cwd, .. },
 								            user_explicitly_approved_this_action,
 								        }) => {
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								            let path_to_codex = std::env::current_exe()
 								                .ok()
 								                .map(|p| p.to_string_lossy().to_string());
 								            let Some(path_to_codex) = path_to_codex else {
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								                return Err(FunctionCallError::RespondToModel(
 								                    "failed to determine path to codex executable".to_string(),
 								                ));
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								            };
 								            let params = ExecParams {
 								                command: vec![
 								                    path_to_codex,
 								                    CODEX_APPLY_PATCH_ARG1.to_string(),
 								                    patch.clone(),
 								                ],
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								                cwd: cwd.clone(),
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								                timeout_ms: params.timeout_ms,
 								                env: HashMap::new(),
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								                with_escalated_permissions: params.with_escalated_permissions,
 								                justification: params.justification.clone(),
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								            };
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								            let safety = if *user_explicitly_approved_this_action {
 								                SafetyCheck::AutoApprove {
 								                    sandbox_type: SandboxType::None,
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                    user_explicitly_approved: true,
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								                }
 								            } else {
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								                assess_safety_for_untrusted_command(
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								                    turn_context.approval_policy,
 								                    &turn_context.sandbox_policy,
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								                    params.with_escalated_permissions.unwrap_or(false),
 								                )
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								            };
 								            (
 								                params,
 								                safety,
 								                vec!["apply_patch".to_string(), patch.clone()],
 								            )
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								        }
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								        None => {
 								            let safety = {
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                let state = sess.state.lock().await;
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								                assess_command_safety(
 								                    &params.command,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								                    turn_context.approval_policy,
 								                    &turn_context.sandbox_policy,
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                    state.approved_commands_ref(),
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								                    params.with_escalated_permissions.unwrap_or(false),
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								                )
 								            };
 								            let command_for_display = params.command.clone();
 								            (params, safety, command_for_display)
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								        }
 								    };
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								    let sandbox_type = match safety {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        SafetyCheck::AutoApprove {
 								            sandbox_type,
 								            user_explicitly_approved,
 								        } => {
 								            otel_event_manager.tool_decision(
 								                tool_name,
 								                call_id.as_str(),
 								                ReviewDecision::Approved,
 								                if user_explicitly_approved {
 								                    ToolDecisionSource::User
 								                } else {
 								                    ToolDecisionSource::Config
 								                },
 								            );
 								            sandbox_type
 								        }
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								        SafetyCheck::AskUser => {
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								            let decision = sess
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								                .request_command_approval(
 								                    sub_id.clone(),
-												Improve messages emitted for exec failures (#1659)

1. Emit call_id to exec approval elicitations for mcp client convenience
2. Remove the `-retry` from the call id for the same reason as above but
upstream the reset behavior to the mcp client
											
										
										
											2025-07-23 11:43:53 -07:00
+								                    call_id.clone(),
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								                    params.command.clone(),
 								                    params.cwd.clone(),
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								                    params.justification.clone(),
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								                )
 								                .await;
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								            match decision {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                ReviewDecision::Approved => {
 								                    otel_event_manager.tool_decision(
 								                        tool_name,
 								                        call_id.as_str(),
 								                        ReviewDecision::Approved,
 								                        ToolDecisionSource::User,
 								                    );
 								                }
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								                ReviewDecision::ApprovedForSession => {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                    otel_event_manager.tool_decision(
 								                        tool_name,
 								                        call_id.as_str(),
 								                        ReviewDecision::ApprovedForSession,
 								                        ToolDecisionSource::User,
 								                    );
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                    sess.add_approved_command(params.command.clone()).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                }
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                ReviewDecision::Denied => {
 								                    otel_event_manager.tool_decision(
 								                        tool_name,
 								                        call_id.as_str(),
 								                        ReviewDecision::Denied,
 								                        ToolDecisionSource::User,
 								                    );
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								                    return Err(FunctionCallError::RespondToModel(
 								                        "exec command rejected by user".to_string(),
 								                    ));
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                }
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                ReviewDecision::Abort => {
 								                    otel_event_manager.tool_decision(
 								                        tool_name,
 								                        call_id.as_str(),
 								                        ReviewDecision::Abort,
 								                        ToolDecisionSource::User,
 								                    );
 								                    return Err(FunctionCallError::RespondToModel(
 								                        "exec command aborted by user".to_string(),
 								                    ));
 								                }
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								            }
 								            // No sandboxing is applied because the user has given
 								            // explicit approval. Often, we end up in this case because
 								            // the command cannot be run in a sandbox, such as
 								            // installing a new dependency that requires network access.
 								            SandboxType::None
 								        }
 								        SafetyCheck::Reject { reason } => {
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								            otel_event_manager.tool_decision(
 								                tool_name,
 								                call_id.as_str(),
 								                ReviewDecision::Denied,
 								                ToolDecisionSource::Config,
 								            );
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            return Err(FunctionCallError::RespondToModel(format!(
 								                "exec command rejected: {reason:?}"
 								            )));
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								        }
 								    };
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								    let exec_command_context = ExecCommandContext {
 								        sub_id: sub_id.clone(),
 								        call_id: call_id.clone(),
 								        command_for_display: command_for_display.clone(),
 								        cwd: params.cwd.clone(),
 								        apply_patch: apply_patch_exec.map(
 								            |ApplyPatchExec {
 								                 action,
 								                 user_explicitly_approved_this_action,
 								             }| ApplyPatchCommandContext {
 								                user_explicitly_approved_this_action,
 								                changes: convert_apply_patch_to_protocol(&action),
 								            },
 								        ),
 								    };
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
-												Bridge command generation to powershell when on Windows (#2319)

## What? Why? How?
- When running on Windows, codex often tries to invoke bash commands,
which commonly fail (unless WSL is installed)
- Fix: Detect if powershell is available and, if so, route commands to
it
- Also add a shell_name property to environmental context for codex to
default to powershell commands when running in that environment

## Testing
- Tested within WSL and powershell (e.g. get top 5 largest files within
a folder and validated that commands generated were powershell commands)
- Tested within Zsh
- Updated unit tests

---------

Co-authored-by: Eddy Escardo <eddy@openai.com>
											
										
										
											2025-08-20 16:30:34 -07:00
+								    let params = maybe_translate_shell_command(params, sess, turn_context);
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								    let output_result = sess
 								        .run_exec_with_events(
 								            turn_diff_tracker,
 								            exec_command_context.clone(),
 								            ExecInvokeArgs {
 								                params: params.clone(),
 								                sandbox_type,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								                sandbox_policy: &turn_context.sandbox_policy,
-												fix: ensure cwd for conversation and sandbox are separate concerns (#3874)

Previous to this PR, both of these functions take a single `cwd`:


https://github.com/openai/codex/blob/71038381aa0f51aa62e1a2bcc7cbf26a05b141f3/codex-rs/core/src/seatbelt.rs#L19-L25


https://github.com/openai/codex/blob/71038381aa0f51aa62e1a2bcc7cbf26a05b141f3/codex-rs/core/src/landlock.rs#L16-L23

whereas `cwd` and `sandbox_cwd` should be set independently (fixed in
this PR).

Added `sandbox_distinguishes_command_and_policy_cwds()` to
`codex-rs/exec/tests/suite/sandbox.rs` to verify this.
											
										
										
											2025-09-18 14:37:06 -07:00
+								                sandbox_cwd: &turn_context.cwd,
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,
-												Don't send Exec deltas on apply patch (#2742)

We are now sending exec deltas on apply patch which doesn't make sense.
											
										
										
											2025-08-26 19:16:51 -07:00
+								                stdout_stream: if exec_command_context.apply_patch.is_some() {
 								                    None
 								                } else {
 								                    Some(StdoutStream {
 								                        sub_id: sub_id.clone(),
 								                        call_id: call_id.clone(),
 								                        tx_event: sess.tx_event.clone(),
 								                    })
 								                },
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								            },
 								        )
 								        .await;
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
 								    match output_result {
 								        Ok(output) => {
-												Include output truncation message in tool call results (#2183)

To avoid model being confused about incomplete output.
											
										
										
											2025-08-11 11:52:05 -07:00
+								            let ExecToolCallOutput { exit_code, .. } = &output;
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
+								            let content = format_exec_output(&output);
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            if *exit_code == 0 {
 								                Ok(content)
 								            } else {
 								                Err(FunctionCallError::RespondToModel(content))
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								            }
 								        }
 								        Err(CodexErr::Sandbox(error)) => {
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								            handle_sandbox_error(
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                tool_name,
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								                turn_diff_tracker,
 								                params,
 								                exec_command_context,
 								                error,
 								                sandbox_type,
 								                sess,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								                turn_context,
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								                &otel_event_manager,
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								            )
 								            .await
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								        }
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								        Err(e) => Err(FunctionCallError::RespondToModel(format!(
 								            "execution error: {e:?}"
 								        ))),
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								    }
 								}
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								#[allow(clippy::too_many_arguments)]
-												feat: add --dangerously-bypass-approvals-and-sandbox (#1384)

This PR reworks `assess_command_safety()` so that the combination of
`AskForApproval::Never` and `SandboxPolicy::DangerFullAccess` ensures
that commands are run without _any_ sandbox and the user should never be
prompted. In turn, it adds support for a new
`--dangerously-bypass-approvals-and-sandbox` flag (that cannot be used
with `--approval-policy` or `--full-auto`) that sets both of those
options.

Fixes https://github.com/openai/codex/issues/1254
											
										
										
											2025-06-25 12:36:10 -07:00
+								async fn handle_sandbox_error(
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								    tool_name: &str,
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								    turn_diff_tracker: &mut TurnDiffTracker,
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								    params: ExecParams,
 								    exec_command_context: ExecCommandContext,
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								    error: SandboxErr,
 								    sandbox_type: SandboxType,
 								    sess: &Session,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    turn_context: &TurnContext,
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								    otel_event_manager: &OtelEventManager,
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								) -> Result<String, FunctionCallError> {
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								    let call_id = exec_command_context.call_id.clone();
 								    let sub_id = exec_command_context.sub_id.clone();
 								    let cwd = exec_command_context.cwd.clone();
-												Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the
timeout.
											
										
										
											2025-09-14 14:38:26 -07:00
+								    if let SandboxErr::Timeout { output } = &error {
 								        let content = format_exec_output(output);
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								        return Err(FunctionCallError::RespondToModel(content));
-												fix: improve handle_sandbox_error timeouts (#3435)

## Summary
Handle timeouts the same way, regardless of approval mode. There's more
to do here, but this is simple and should be zero-regret

## Testing
- [x] existing tests pass
- [x] test locally and verify rollout
											
										
										
											2025-09-11 12:09:20 -07:00
+								    }
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								    // Early out if either the user never wants to be asked for approval, or
 								    // we're letting the model manage escalation requests. Otherwise, continue
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    match turn_context.approval_policy {
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								        AskForApproval::Never | AskForApproval::OnRequest => {
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            return Err(FunctionCallError::RespondToModel(format!(
 								                "failed in sandbox {sandbox_type:?} with execution error: {error:?}"
 								            )));
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								        }
 								        AskForApproval::UnlessTrusted | AskForApproval::OnFailure => (),
-												[core] Stop escalating timeouts (#1853)

## Summary
Escalating out of sandbox is (almost always) not going to fix
long-running commands timing out - therefore we should just pass the
failure back to the model instead of asking the user to re-run a command
that took a long time anyway.

## Testing
- [x] Ran locally with a timeout and confirmed this worked as expected
											
										
										
											2025-08-05 17:52:25 -07:00
+								    }
-												feat: add --dangerously-bypass-approvals-and-sandbox (#1384)

This PR reworks `assess_command_safety()` so that the combination of
`AskForApproval::Never` and `SandboxPolicy::DangerFullAccess` ensures
that commands are run without _any_ sandbox and the user should never be
prompted. In turn, it adds support for a new
`--dangerously-bypass-approvals-and-sandbox` flag (that cannot be used
with `--approval-policy` or `--full-auto`) that sets both of those
options.

Fixes https://github.com/openai/codex/issues/1254
											
										
										
											2025-06-25 12:36:10 -07:00
+								    // Note that when `error` is `SandboxErr::Denied`, it could be a false
 								    // positive. That is, it may have exited with a non-zero exit code, not
 								    // because the sandbox denied it, but because that is its expected behavior,
 								    // i.e., a grep command that did not match anything. Ideally we would
 								    // include additional metadata on the command to indicate whether non-zero
 								    // exit codes merit a retry.
-												Stream model responses (#1810)

Stream models thoughts and responses instead of waiting for the whole
thing to come through. Very rough right now, but I'm making the risk call to push through.
											
										
										
											2025-08-04 21:23:22 -07:00
+								    // For now, we categorically ask the user to retry without sandbox and
 								    // emit the raw error as a background event.
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								    sess.notify_background_event(&sub_id, format!("Execution failed: {error}"))
 								        .await;
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								    let decision = sess
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								        .request_command_approval(
 								            sub_id.clone(),
-												Improve messages emitted for exec failures (#1659)

1. Emit call_id to exec approval elicitations for mcp client convenience
2. Remove the `-retry` from the call id for the same reason as above but
upstream the reset behavior to the mcp client
											
										
										
											2025-07-23 11:43:53 -07:00
+								            call_id.clone(),
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								            params.command.clone(),
-												fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


https://github.com/openai/codex/blob/549846b29ad52f6cb4f8560365a731966054a9b3/codex-rs/core/src/apply_patch.rs#L61-L63

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
											
										
										
											2025-07-31 11:13:57 -07:00
+								            cwd.clone(),
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								            Some("command failed; retry without sandbox?".to_string()),
 								        )
 								        .await;
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								    match decision {
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								        ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
 								            // Persist this command as pre‑approved for the
 								            // remainder of the session so future
 								            // executions skip the sandbox directly.
 								            // TODO(ragona): Isn't this a bug? It always saves the command in an | fork?
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								            sess.add_approved_command(params.command.clone()).await;
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								            // Inform UI we are retrying without sandbox.
 								            sess.notify_background_event(&sub_id, "retrying command without sandbox")
 								                .await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								            otel_event_manager.tool_decision(
 								                tool_name,
 								                call_id.as_str(),
 								                decision,
 								                ToolDecisionSource::User,
 								            );
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								            // This is an escalated retry; the policy will not be
 								            // examined and the sandbox has been set to `None`.
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								            let retry_output_result = sess
 								                .run_exec_with_events(
 								                    turn_diff_tracker,
 								                    exec_command_context.clone(),
 								                    ExecInvokeArgs {
 								                        params,
 								                        sandbox_type: SandboxType::None,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								                        sandbox_policy: &turn_context.sandbox_policy,
-												fix: ensure cwd for conversation and sandbox are separate concerns (#3874)

Previous to this PR, both of these functions take a single `cwd`:


https://github.com/openai/codex/blob/71038381aa0f51aa62e1a2bcc7cbf26a05b141f3/codex-rs/core/src/seatbelt.rs#L19-L25


https://github.com/openai/codex/blob/71038381aa0f51aa62e1a2bcc7cbf26a05b141f3/codex-rs/core/src/landlock.rs#L16-L23

whereas `cwd` and `sandbox_cwd` should be set independently (fixed in
this PR).

Added `sandbox_distinguishes_command_and_policy_cwds()` to
`codex-rs/exec/tests/suite/sandbox.rs` to verify this.
											
										
										
											2025-09-18 14:37:06 -07:00
+								                        sandbox_cwd: &turn_context.cwd,
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                        codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,
-												Don't send Exec deltas on apply patch (#2742)

We are now sending exec deltas on apply patch which doesn't make sense.
											
										
										
											2025-08-26 19:16:51 -07:00
+								                        stdout_stream: if exec_command_context.apply_patch.is_some() {
 								                            None
 								                        } else {
 								                            Some(StdoutStream {
 								                                sub_id: sub_id.clone(),
 								                                call_id: call_id.clone(),
 								                                tx_event: sess.tx_event.clone(),
 								                            })
 								                        },
-												Ensure exec command end always emitted (#1908)

## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to
`final_output`
- replace unstable `let` chains in client command header logic

## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in
core/tests/client.rs)*

------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
											
										
										
											2025-08-06 23:25:56 -07:00
+								                    },
 								                )
 								                .await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								            match retry_output_result {
 								                Ok(retry_output) => {
-												Include output truncation message in tool call results (#2183)

To avoid model being confused about incomplete output.
											
										
										
											2025-08-11 11:52:05 -07:00
+								                    let ExecToolCallOutput { exit_code, .. } = &retry_output;
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
+								                    let content = format_exec_output(&retry_output);
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								                    if *exit_code == 0 {
 								                        Ok(content)
 								                    } else {
 								                        Err(FunctionCallError::RespondToModel(content))
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                    }
 								                }
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								                Err(e) => Err(FunctionCallError::RespondToModel(format!(
 								                    "retry failed: {e}"
 								                ))),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
 								        }
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        decision @ (ReviewDecision::Denied | ReviewDecision::Abort) => {
 								            otel_event_manager.tool_decision(
 								                tool_name,
 								                call_id.as_str(),
 								                decision,
 								                ToolDecisionSource::User,
 								            );
-												chore: refactor handle_function_call() into smaller functions (#965)

Overall, `codex.rs` is still far too large, but at least there's less
indenting now that things have been moved into smaller functions.

This will also make it easier to introduce the `local_shell` tool in
https://github.com/openai/codex/pull/961.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/965).
* #961
* __->__ #965
											
										
										
											2025-05-16 14:17:10 -07:00
+								            // Fall through to original failure handling.
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            Err(FunctionCallError::RespondToModel(
 								                "exec command rejected by user".to_string(),
 								            ))
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
 								    }
 								}
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
+								fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
-												Include output truncation message in tool call results (#2183)

To avoid model being confused about incomplete output.
											
										
										
											2025-08-11 11:52:05 -07:00
+								    let ExecToolCallOutput {
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								        aggregated_output, ..
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
+								    } = exec_output;
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								    // Head+tail truncation for the model: show the beginning and end with an elision.
 								    // Clients still receive full streams; only this formatted summary is capped.
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
-												Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the
timeout.
											
										
										
											2025-09-14 14:38:26 -07:00
+								    let mut s = &aggregated_output.text;
 								    let prefixed_str: String;
 								    if exec_output.timed_out {
 								        prefixed_str = format!(
 								            "command timed out after {} milliseconds\n",
 								            exec_output.duration.as_millis()
 								        ) + s;
 								        s = &prefixed_str;
 								    }
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								    let total_lines = s.lines().count();
 								    if s.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
 								        return s.to_string();
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
+								    }
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								    let lines: Vec<&str> = s.lines().collect();
 								    let head_take = MODEL_FORMAT_HEAD_LINES.min(lines.len());
 								    let tail_take = MODEL_FORMAT_TAIL_LINES.min(lines.len().saturating_sub(head_take));
 								    let omitted = lines.len().saturating_sub(head_take + tail_take);
 								    // Join head and tail blocks (lines() strips newlines; reinsert them)
 								    let head_block = lines
 								        .iter()
 								        .take(head_take)
 								        .cloned()
 								        .collect::<Vec<_>>()
 								        .join("\n");
 								    let tail_block = if tail_take > 0 {
 								        lines[lines.len() - tail_take..].join("\n")
 								    } else {
 								        String::new()
 								    };
 								    let marker = format!("\n[... omitted {omitted} of {total_lines} lines ...]\n\n");
 								    // Byte budgets for head/tail around the marker
 								    let mut head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
 								    let tail_budget = MODEL_FORMAT_MAX_BYTES.saturating_sub(head_budget + marker.len());
 								    if tail_budget == 0 && marker.len() >= MODEL_FORMAT_MAX_BYTES {
 								        // Degenerate case: marker alone exceeds budget; return a clipped marker
 								        return take_bytes_at_char_boundary(&marker, MODEL_FORMAT_MAX_BYTES).to_string();
 								    }
 								    if tail_budget == 0 {
 								        // Make room for the marker by shrinking head
 								        head_budget = MODEL_FORMAT_MAX_BYTES.saturating_sub(marker.len());
 								    }
 								    // Enforce line-count cap by trimming head/tail lines
 								    let head_lines_text = head_block;
 								    let tail_lines_text = tail_block;
 								    // Build final string respecting byte budgets
 								    let head_part = take_bytes_at_char_boundary(&head_lines_text, head_budget);
 								    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len()));
-												Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the
timeout.
											
										
										
											2025-09-14 14:38:26 -07:00
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								    result.push_str(head_part);
 								    result.push_str(&marker);
 								    let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
 								    let tail_budget_final = remaining;
 								    let tail_part = take_last_bytes_at_char_boundary(&tail_lines_text, tail_budget_final);
 								    result.push_str(tail_part);
 								    result
 								}
 								// Truncate a &str to a byte budget at a char boundary (prefix)
 								#[inline]
 								fn take_bytes_at_char_boundary(s: &str, maxb: usize) -> &str {
 								    if s.len() <= maxb {
 								        return s;
 								    }
 								    let mut last_ok = 0;
 								    for (i, ch) in s.char_indices() {
 								        let nb = i + ch.len_utf8();
 								        if nb > maxb {
 								            break;
 								        }
 								        last_ok = nb;
 								    }
 								    &s[..last_ok]
 								}
 								// Take a suffix of a &str within a byte budget at a char boundary
 								#[inline]
 								fn take_last_bytes_at_char_boundary(s: &str, maxb: usize) -> &str {
 								    if s.len() <= maxb {
 								        return s;
 								    }
 								    let mut start = s.len();
 								    let mut used = 0usize;
 								    for (i, ch) in s.char_indices().rev() {
 								        let nb = ch.len_utf8();
 								        if used + nb > maxb {
 								            break;
 								        }
 								        start = i;
 								        used += nb;
 								        if start == 0 {
 								            break;
 								        }
 								    }
 								    &s[start..]
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
+								}
 								/// Exec output is a pre-serialized JSON payload
 								fn format_exec_output(exec_output: &ExecToolCallOutput) -> String {
 								    let ExecToolCallOutput {
 								        exit_code,
-												Include output truncation message in tool call results (#2183)

To avoid model being confused about incomplete output.
											
										
										
											2025-08-11 11:52:05 -07:00
+								        duration,
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
+								        ..
-												Include output truncation message in tool call results (#2183)

To avoid model being confused about incomplete output.
											
										
										
											2025-08-11 11:52:05 -07:00
+								    } = exec_output;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    #[derive(Serialize)]
 								    struct ExecMetadata {
 								        exit_code: i32,
 								        duration_seconds: f32,
 								    }
 								    #[derive(Serialize)]
 								    struct ExecOutput<'a> {
 								        output: &'a str,
 								        metadata: ExecMetadata,
 								    }
 								    // round to 1 decimal place
 								    let duration_seconds = ((duration.as_secs_f32()) * 10.0).round() / 10.0;
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
+								    let formatted_output = format_exec_output_str(exec_output);
-												Include output truncation message in tool call results (#2183)

To avoid model being confused about incomplete output.
											
										
										
											2025-08-11 11:52:05 -07:00
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    let payload = ExecOutput {
-												Include output truncation message in tool call results (#2183)

To avoid model being confused about incomplete output.
											
										
										
											2025-08-11 11:52:05 -07:00
+								        output: &formatted_output,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        metadata: ExecMetadata {
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
+								            exit_code: *exit_code,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            duration_seconds,
 								        },
 								    };
-												Disallow expect via lints (#865)

Adds `expect()` as a denied lint. Same deal applies with `unwrap()`
where we now need to put `#[expect(...` on ones that we legit want. Took
care to enable `expect()` in test contexts.

# Tests

```
cargo fmt
cargo clippy --all-features --all-targets --no-deps -- -D warnings
cargo test
```
											
										
										
											2025-05-12 08:45:46 -07:00
+								    #[expect(clippy::expect_used)]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    serde_json::to_string(&payload).expect("serialize ExecOutput")
 								}
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<String> {
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								    responses.iter().rev().find_map(|item| {
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								        if let ResponseItem::Message { role, content, .. } = item {
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								            if role == "assistant" {
 								                content.iter().rev().find_map(|ci| {
 								                    if let ContentItem::OutputText { text } = ci {
 								                        Some(text.clone())
 								                    } else {
 								                        None
 								                    }
 								                })
 								            } else {
 								                None
 								            }
 								        } else {
 								            None
 								        }
 								    })
 								}
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								fn convert_call_tool_result_to_function_call_output_payload(
 								    call_tool_result: &CallToolResult,
 								) -> FunctionCallOutputPayload {
 								    let CallToolResult {
 								        content,
 								        is_error,
 								        structured_content,
 								    } = call_tool_result;
 								    // In terms of what to send back to the model, we prefer structured_content,
 								    // if available, and fallback to content, otherwise.
 								    let mut is_success = is_error != &Some(true);
 								    let content = if let Some(structured_content) = structured_content
 								        && structured_content != &serde_json::Value::Null
 								        && let Ok(serialized_structured_content) = serde_json::to_string(&structured_content)
 								    {
 								        serialized_structured_content
 								    } else {
 								        match serde_json::to_string(&content) {
 								            Ok(serialized_content) => serialized_content,
 								            Err(err) => {
 								                // If we could not serialize either content or structured_content to
 								                // JSON, flag this as an error.
 								                is_success = false;
 								                err.to_string()
 								            }
 								        }
 								    };
 								    FunctionCallOutputPayload {
 								        content,
 								        success: Some(is_success),
 								    }
 								}
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
+								/// Emits an ExitedReviewMode Event with optional ReviewOutput,
 								/// and records a developer message with the review output.
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								pub(crate) async fn exit_review_mode(
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    session: Arc<Session>,
 								    task_sub_id: String,
-												Fix EventMsg Optional (#3604)


											
										
										
											2025-09-14 17:34:33 -07:00
+								    review_output: Option<ReviewOutputEvent>,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								) {
 								    let event = Event {
 								        id: task_sub_id,
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
+								        msg: EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
 								            review_output: review_output.clone(),
 								        }),
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    };
 								    session.send_event(event).await;
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
 								    let mut user_message = String::new();
 								    if let Some(out) = review_output {
 								        let mut findings_str = String::new();
 								        let text = out.overall_explanation.trim();
 								        if !text.is_empty() {
 								            findings_str.push_str(text);
 								        }
 								        if !out.findings.is_empty() {
 								            let block = format_review_findings_block(&out.findings, None);
 								            findings_str.push_str(&format!("\n{block}"));
 								        }
 								        user_message.push_str(&format!(
 								            r#"<user_action>
 								  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
 								  <action>review</action>
 								  <results>
 								  {findings_str}
 								  </results>
-												feat: /review (#3774)

Adds `/review` action in TUI

<img width="637" height="370" alt="Screenshot 2025-09-17 at 12 41 19 AM"
src="https://github.com/user-attachments/assets/b1979a6e-844a-4b97-ab20-107c185aec1d"
/>
											
										
										
											2025-09-18 14:14:16 -07:00
+								</user_action>
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
+								"#));
 								    } else {
 								        user_message.push_str(r#"<user_action>
 								  <context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
 								  <action>review</action>
 								  <results>
 								  None.
 								  </results>
-												feat: /review (#3774)

Adds `/review` action in TUI

<img width="637" height="370" alt="Screenshot 2025-09-17 at 12 41 19 AM"
src="https://github.com/user-attachments/assets/b1979a6e-844a-4b97-ab20-107c185aec1d"
/>
											
										
										
											2025-09-18 14:14:16 -07:00
+								</user_action>
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
+								"#);
 								    }
 								    session
 								        .record_conversation_items(&[ResponseItem::Message {
 								            id: None,
 								            role: "user".to_string(),
 								            content: vec![ContentItem::InputText { text: user_message }],
 								        }])
 								        .await;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								}
-												Reland "refactor transcript view to handle HistoryCells" (#3753)

Reland of #3538
											
										
										
											2025-09-18 13:55:53 -07:00
+								#[cfg(test)]
 								pub(crate) use tests::make_session_and_context;
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								#[cfg(test)]
 								mod tests {
 								    use super::*;
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    use crate::config::ConfigOverrides;
 								    use crate::config::ConfigToml;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    use crate::protocol::CompactedItem;
 								    use crate::protocol::InitialHistory;
 								    use crate::protocol::ResumedHistory;
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    use crate::state::TaskKind;
 								    use crate::tasks::SessionTask;
 								    use crate::tasks::SessionTaskContext;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								    use codex_protocol::mcp_protocol::AuthMode;
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    use codex_protocol::models::ContentItem;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								    use codex_protocol::models::ResponseItem;
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								    use mcp_types::ContentBlock;
 								    use mcp_types::TextContent;
 								    use pretty_assertions::assert_eq;
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								    use serde::Deserialize;
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								    use serde_json::json;
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    use std::path::PathBuf;
 								    use std::sync::Arc;
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								    use std::time::Duration as StdDuration;
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    use tokio::time::Duration;
 								    use tokio::time::sleep;
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    #[test]
 								    fn reconstruct_history_matches_live_compactions() {
 								        let (session, turn_context) = make_session_and_context();
 								        let (rollout_items, expected) = sample_rollout(&session, &turn_context);
 								        let reconstructed = session.reconstruct_history_from_rollout(&turn_context, &rollout_items);
 								        assert_eq!(expected, reconstructed);
 								    }
 								    #[test]
 								    fn record_initial_history_reconstructs_resumed_transcript() {
 								        let (session, turn_context) = make_session_and_context();
 								        let (rollout_items, expected) = sample_rollout(&session, &turn_context);
 								        tokio_test::block_on(session.record_initial_history(
 								            &turn_context,
 								            InitialHistory::Resumed(ResumedHistory {
 								                conversation_id: ConversationId::default(),
 								                history: rollout_items,
 								                rollout_path: PathBuf::from("/tmp/resume.jsonl"),
 								            }),
 								        ));
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        let actual = tokio_test::block_on(async { session.state.lock().await.history_snapshot() });
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        assert_eq!(expected, actual);
 								    }
 								    #[test]
 								    fn record_initial_history_reconstructs_forked_transcript() {
 								        let (session, turn_context) = make_session_and_context();
 								        let (rollout_items, expected) = sample_rollout(&session, &turn_context);
 								        tokio_test::block_on(
 								            session.record_initial_history(&turn_context, InitialHistory::Forked(rollout_items)),
 								        );
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        let actual = tokio_test::block_on(async { session.state.lock().await.history_snapshot() });
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        assert_eq!(expected, actual);
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								    }
 								    #[test]
 								    fn prefers_structured_content_when_present() {
 								        let ctr = CallToolResult {
 								            // Content present but should be ignored because structured_content is set.
 								            content: vec![text_block("ignored")],
 								            is_error: None,
 								            structured_content: Some(json!({
 								                "ok": true,
 								                "value": 42
 								            })),
 								        };
 								        let got = convert_call_tool_result_to_function_call_output_payload(&ctr);
 								        let expected = FunctionCallOutputPayload {
 								            content: serde_json::to_string(&json!({
 								                "ok": true,
 								                "value": 42
 								            }))
 								            .unwrap(),
 								            success: Some(true),
 								        };
 								        assert_eq!(expected, got);
 								    }
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								    #[test]
 								    fn model_truncation_head_tail_by_lines() {
 								        // Build 400 short lines so line-count limit, not byte budget, triggers truncation
 								        let lines: Vec<String> = (1..=400).map(|i| format!("line{i}")).collect();
 								        let full = lines.join("\n");
 								        let exec = ExecToolCallOutput {
 								            exit_code: 0,
 								            stdout: StreamOutput::new(String::new()),
 								            stderr: StreamOutput::new(String::new()),
-												chore: enable clippy::redundant_clone (#3489)

Created this PR by:

- adding `redundant_clone` to `[workspace.lints.clippy]` in
`cargo-rs/Cargol.toml`
- running `cargo clippy --tests --fix`
- running `just fmt`

Though I had to clean up one instance of the following that resulted:

```rust
let codex = codex;
```
											
										
										
											2025-09-11 11:59:37 -07:00
+								            aggregated_output: StreamOutput::new(full),
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								            duration: StdDuration::from_secs(1),
-												Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the
timeout.
											
										
										
											2025-09-14 14:38:26 -07:00
+								            timed_out: false,
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								        };
 								        let out = format_exec_output_str(&exec);
 								        // Expect elision marker with correct counts
 								        let omitted = 400 - MODEL_FORMAT_MAX_LINES; // 144
 								        let marker = format!("\n[... omitted {omitted} of 400 lines ...]\n\n");
 								        assert!(out.contains(&marker), "missing marker: {out}");
 								        // Validate head and tail
 								        let parts: Vec<&str> = out.split(&marker).collect();
 								        assert_eq!(parts.len(), 2, "expected one marker split");
 								        let head = parts[0];
 								        let tail = parts[1];
 								        let expected_head: String = (1..=MODEL_FORMAT_HEAD_LINES)
 								            .map(|i| format!("line{i}"))
 								            .collect::<Vec<_>>()
 								            .join("\n");
 								        assert!(head.starts_with(&expected_head), "head mismatch");
 								        let expected_tail: String = ((400 - MODEL_FORMAT_TAIL_LINES + 1)..=400)
 								            .map(|i| format!("line{i}"))
 								            .collect::<Vec<_>>()
 								            .join("\n");
 								        assert!(tail.ends_with(&expected_tail), "tail mismatch");
 								    }
 								    #[test]
 								    fn model_truncation_respects_byte_budget() {
 								        // Construct a large output (about 100kB) so byte budget dominates
 								        let big_line = "x".repeat(100);
-												chore: enable clippy::redundant_clone (#3489)

Created this PR by:

- adding `redundant_clone` to `[workspace.lints.clippy]` in
`cargo-rs/Cargol.toml`
- running `cargo clippy --tests --fix`
- running `just fmt`

Though I had to clean up one instance of the following that resulted:

```rust
let codex = codex;
```
											
										
										
											2025-09-11 11:59:37 -07:00
+								        let full = std::iter::repeat_n(big_line, 1000)
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								            .collect::<Vec<_>>()
 								            .join("\n");
 								        let exec = ExecToolCallOutput {
 								            exit_code: 0,
 								            stdout: StreamOutput::new(String::new()),
 								            stderr: StreamOutput::new(String::new()),
 								            aggregated_output: StreamOutput::new(full.clone()),
 								            duration: StdDuration::from_secs(1),
-												Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the
timeout.
											
										
										
											2025-09-14 14:38:26 -07:00
+								            timed_out: false,
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								        };
 								        let out = format_exec_output_str(&exec);
 								        assert!(out.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget");
 								        assert!(out.contains("omitted"), "should contain elision marker");
 								        // Ensure head and tail are drawn from the original
 								        assert!(full.starts_with(out.chars().take(8).collect::<String>().as_str()));
 								        assert!(
 								            full.ends_with(
 								                out.chars()
 								                    .rev()
 								                    .take(8)
 								                    .collect::<String>()
 								                    .chars()
 								                    .rev()
 								                    .collect::<String>()
 								                    .as_str()
 								            )
 								        );
 								    }
-												Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the
timeout.
											
										
										
											2025-09-14 14:38:26 -07:00
+								    #[test]
 								    fn includes_timed_out_message() {
 								        let exec = ExecToolCallOutput {
 								            exit_code: 0,
 								            stdout: StreamOutput::new(String::new()),
 								            stderr: StreamOutput::new(String::new()),
 								            aggregated_output: StreamOutput::new("Command output".to_string()),
 								            duration: StdDuration::from_secs(1),
 								            timed_out: true,
 								        };
 								        let out = format_exec_output_str(&exec);
 								        assert_eq!(
 								            out,
 								            "command timed out after 1000 milliseconds\nCommand output"
 								        );
 								    }
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								    #[test]
 								    fn falls_back_to_content_when_structured_is_null() {
 								        let ctr = CallToolResult {
 								            content: vec![text_block("hello"), text_block("world")],
 								            is_error: None,
 								            structured_content: Some(serde_json::Value::Null),
 								        };
 								        let got = convert_call_tool_result_to_function_call_output_payload(&ctr);
 								        let expected = FunctionCallOutputPayload {
 								            content: serde_json::to_string(&vec![text_block("hello"), text_block("world")])
 								                .unwrap(),
 								            success: Some(true),
 								        };
 								        assert_eq!(expected, got);
 								    }
 								    #[test]
 								    fn success_flag_reflects_is_error_true() {
 								        let ctr = CallToolResult {
 								            content: vec![text_block("unused")],
 								            is_error: Some(true),
 								            structured_content: Some(json!({ "message": "bad" })),
 								        };
 								        let got = convert_call_tool_result_to_function_call_output_payload(&ctr);
 								        let expected = FunctionCallOutputPayload {
 								            content: serde_json::to_string(&json!({ "message": "bad" })).unwrap(),
 								            success: Some(false),
 								        };
 								        assert_eq!(expected, got);
 								    }
 								    #[test]
 								    fn success_flag_true_with_no_error_and_content_used() {
 								        let ctr = CallToolResult {
 								            content: vec![text_block("alpha")],
 								            is_error: Some(false),
 								            structured_content: None,
 								        };
 								        let got = convert_call_tool_result_to_function_call_output_payload(&ctr);
 								        let expected = FunctionCallOutputPayload {
 								            content: serde_json::to_string(&vec![text_block("alpha")]).unwrap(),
 								            success: Some(true),
 								        };
 								        assert_eq!(expected, got);
 								    }
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
 								    fn text_block(s: &str) -> ContentBlock {
 								        ContentBlock::TextContent(TextContent {
 								            annotations: None,
 								            text: s.to_string(),
 								            r#type: "text".to_string(),
 								        })
 								    }
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								    fn otel_event_manager(conversation_id: ConversationId, config: &Config) -> OtelEventManager {
 								        OtelEventManager::new(
 								            conversation_id,
 								            config.model.as_str(),
 								            config.model_family.slug.as_str(),
 								            None,
 								            Some(AuthMode::ChatGPT),
 								            false,
 								            "test".to_string(),
 								        )
 								    }
-												Reland "refactor transcript view to handle HistoryCells" (#3753)

Reland of #3538
											
										
										
											2025-09-18 13:55:53 -07:00
+								    pub(crate) fn make_session_and_context() -> (Session, TurnContext) {
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        let (tx_event, _rx_event) = async_channel::unbounded();
 								        let codex_home = tempfile::tempdir().expect("create temp dir");
 								        let config = Config::load_from_base_config_with_overrides(
 								            ConfigToml::default(),
 								            ConfigOverrides::default(),
 								            codex_home.path().to_path_buf(),
 								        )
 								        .expect("load default test config");
 								        let config = Arc::new(config);
 								        let conversation_id = ConversationId::default();
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        let otel_event_manager = otel_event_manager(conversation_id, config.as_ref());
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        let client = ModelClient::new(
 								            config.clone(),
 								            None,
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								            otel_event_manager,
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								            config.model_provider.clone(),
 								            config.model_reasoning_effort,
 								            config.model_reasoning_summary,
 								            conversation_id,
 								        );
 								        let tools_config = ToolsConfig::new(&ToolsConfigParams {
 								            model_family: &config.model_family,
 								            include_plan_tool: config.include_plan_tool,
 								            include_apply_patch_tool: config.include_apply_patch_tool,
 								            include_web_search_request: config.tools_web_search_request,
 								            use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
 								            include_view_image_tool: config.include_view_image_tool,
 								            experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
 								        });
 								        let turn_context = TurnContext {
 								            client,
 								            cwd: config.cwd.clone(),
 								            base_instructions: config.base_instructions.clone(),
 								            user_instructions: config.user_instructions.clone(),
 								            approval_policy: config.approval_policy,
 								            sandbox_policy: config.sandbox_policy.clone(),
 								            shell_environment_policy: config.shell_environment_policy.clone(),
 								            tools_config,
 								            is_review_mode: false,
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								            final_output_json_schema: None,
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        };
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        let services = SessionServices {
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								            mcp_connection_manager: McpConnectionManager::default(),
 								            session_manager: ExecSessionManager::default(),
 								            unified_exec_manager: UnifiedExecSessionManager::default(),
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								            notifier: UserNotifier::default(),
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								            rollout: Mutex::new(None),
 								            codex_linux_sandbox_exe: None,
 								            user_shell: shell::Shell::Unknown,
 								            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        };
 								        let session = Session {
 								            conversation_id,
 								            tx_event,
 								            state: Mutex::new(SessionState::new()),
 								            active_turn: Mutex::new(None),
 								            services,
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								            next_internal_sub_id: AtomicU64::new(0),
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        };
 								        (session, turn_context)
 								    }
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    // Like make_session_and_context, but returns Arc<Session> and the event receiver
 								    // so tests can assert on emitted events.
 								    fn make_session_and_context_with_rx() -> (
 								        Arc<Session>,
 								        Arc<TurnContext>,
 								        async_channel::Receiver<Event>,
 								    ) {
 								        let (tx_event, rx_event) = async_channel::unbounded();
 								        let codex_home = tempfile::tempdir().expect("create temp dir");
 								        let config = Config::load_from_base_config_with_overrides(
 								            ConfigToml::default(),
 								            ConfigOverrides::default(),
 								            codex_home.path().to_path_buf(),
 								        )
 								        .expect("load default test config");
 								        let config = Arc::new(config);
 								        let conversation_id = ConversationId::default();
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        let otel_event_manager = otel_event_manager(conversation_id, config.as_ref());
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        let client = ModelClient::new(
 								            config.clone(),
 								            None,
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								            otel_event_manager,
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								            config.model_provider.clone(),
 								            config.model_reasoning_effort,
 								            config.model_reasoning_summary,
 								            conversation_id,
 								        );
 								        let tools_config = ToolsConfig::new(&ToolsConfigParams {
 								            model_family: &config.model_family,
 								            include_plan_tool: config.include_plan_tool,
 								            include_apply_patch_tool: config.include_apply_patch_tool,
 								            include_web_search_request: config.tools_web_search_request,
 								            use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
 								            include_view_image_tool: config.include_view_image_tool,
 								            experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
 								        });
 								        let turn_context = Arc::new(TurnContext {
 								            client,
 								            cwd: config.cwd.clone(),
 								            base_instructions: config.base_instructions.clone(),
 								            user_instructions: config.user_instructions.clone(),
 								            approval_policy: config.approval_policy,
 								            sandbox_policy: config.sandbox_policy.clone(),
 								            shell_environment_policy: config.shell_environment_policy.clone(),
 								            tools_config,
 								            is_review_mode: false,
 								            final_output_json_schema: None,
 								        });
 								        let services = SessionServices {
 								            mcp_connection_manager: McpConnectionManager::default(),
 								            session_manager: ExecSessionManager::default(),
 								            unified_exec_manager: UnifiedExecSessionManager::default(),
 								            notifier: UserNotifier::default(),
 								            rollout: Mutex::new(None),
 								            codex_linux_sandbox_exe: None,
 								            user_shell: shell::Shell::Unknown,
 								            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
 								        };
 								        let session = Arc::new(Session {
 								            conversation_id,
 								            tx_event,
 								            state: Mutex::new(SessionState::new()),
 								            active_turn: Mutex::new(None),
 								            services,
 								            next_internal_sub_id: AtomicU64::new(0),
 								        });
 								        (session, turn_context, rx_event)
 								    }
 								    #[derive(Clone, Copy)]
 								    struct NeverEndingTask(TaskKind);
 								    #[async_trait::async_trait]
 								    impl SessionTask for NeverEndingTask {
 								        fn kind(&self) -> TaskKind {
 								            self.0
 								        }
 								        async fn run(
 								            self: Arc<Self>,
 								            _session: Arc<SessionTaskContext>,
 								            _ctx: Arc<TurnContext>,
 								            _sub_id: String,
 								            _input: Vec<InputItem>,
 								        ) -> Option<String> {
 								            loop {
 								                sleep(Duration::from_secs(60)).await;
 								            }
 								        }
 								        async fn abort(&self, session: Arc<SessionTaskContext>, sub_id: &str) {
 								            if let TaskKind::Review = self.0 {
 								                exit_review_mode(session.clone_session(), sub_id.to_string(), None).await;
 								            }
 								        }
 								    }
 								    #[tokio::test]
 								    async fn abort_regular_task_emits_turn_aborted_only() {
 								        let (sess, tc, rx) = make_session_and_context_with_rx();
 								        let sub_id = "sub-regular".to_string();
 								        let input = vec![InputItem::Text {
 								            text: "hello".to_string(),
 								        }];
 								        sess.spawn_task(
 								            Arc::clone(&tc),
 								            sub_id.clone(),
 								            input,
 								            NeverEndingTask(TaskKind::Regular),
 								        )
 								        .await;
 								        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
 								        let evt = rx.recv().await.expect("event");
 								        match evt.msg {
 								            EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason),
 								            other => panic!("unexpected event: {other:?}"),
 								        }
 								        assert!(rx.try_recv().is_err());
 								    }
 								    #[tokio::test]
 								    async fn abort_review_task_emits_exited_then_aborted_and_records_history() {
 								        let (sess, tc, rx) = make_session_and_context_with_rx();
 								        let sub_id = "sub-review".to_string();
 								        let input = vec![InputItem::Text {
 								            text: "start review".to_string(),
 								        }];
 								        sess.spawn_task(
 								            Arc::clone(&tc),
 								            sub_id.clone(),
 								            input,
 								            NeverEndingTask(TaskKind::Review),
 								        )
 								        .await;
 								        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
 								        let first = rx.recv().await.expect("first event");
 								        match first.msg {
 								            EventMsg::ExitedReviewMode(ev) => assert!(ev.review_output.is_none()),
 								            other => panic!("unexpected first event: {other:?}"),
 								        }
 								        let second = rx.recv().await.expect("second event");
 								        match second.msg {
 								            EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason),
 								            other => panic!("unexpected second event: {other:?}"),
 								        }
 								        let history = sess.history_snapshot().await;
 								        let found = history.iter().any(|item| match item {
 								            ResponseItem::Message { role, content, .. } if role == "user" => {
 								                content.iter().any(|ci| match ci {
 								                    ContentItem::InputText { text } => {
 								                        text.contains("<user_action>")
 								                            && text.contains("review")
 								                            && text.contains("interrupted")
 								                    }
 								                    _ => false,
 								                })
 								            }
 								            _ => false,
 								        });
 								        assert!(
 								            found,
 								            "synthetic review interruption not recorded in history"
 								        );
 								    }
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    fn sample_rollout(
 								        session: &Session,
 								        turn_context: &TurnContext,
 								    ) -> (Vec<RolloutItem>, Vec<ResponseItem>) {
 								        let mut rollout_items = Vec::new();
 								        let mut live_history = ConversationHistory::new();
 								        let initial_context = session.build_initial_context(turn_context);
 								        for item in &initial_context {
 								            rollout_items.push(RolloutItem::ResponseItem(item.clone()));
 								        }
 								        live_history.record_items(initial_context.iter());
 								        let user1 = ResponseItem::Message {
 								            id: None,
 								            role: "user".to_string(),
 								            content: vec![ContentItem::InputText {
 								                text: "first user".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&user1));
 								        rollout_items.push(RolloutItem::ResponseItem(user1.clone()));
 								        let assistant1 = ResponseItem::Message {
 								            id: None,
 								            role: "assistant".to_string(),
 								            content: vec![ContentItem::OutputText {
 								                text: "assistant reply one".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&assistant1));
 								        rollout_items.push(RolloutItem::ResponseItem(assistant1.clone()));
 								        let summary1 = "summary one";
 								        let snapshot1 = live_history.contents();
 								        let user_messages1 = collect_user_messages(&snapshot1);
 								        let rebuilt1 = build_compacted_history(
 								            session.build_initial_context(turn_context),
 								            &user_messages1,
 								            summary1,
 								        );
 								        live_history.replace(rebuilt1);
 								        rollout_items.push(RolloutItem::Compacted(CompactedItem {
 								            message: summary1.to_string(),
 								        }));
 								        let user2 = ResponseItem::Message {
 								            id: None,
 								            role: "user".to_string(),
 								            content: vec![ContentItem::InputText {
 								                text: "second user".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&user2));
 								        rollout_items.push(RolloutItem::ResponseItem(user2.clone()));
 								        let assistant2 = ResponseItem::Message {
 								            id: None,
 								            role: "assistant".to_string(),
 								            content: vec![ContentItem::OutputText {
 								                text: "assistant reply two".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&assistant2));
 								        rollout_items.push(RolloutItem::ResponseItem(assistant2.clone()));
 								        let summary2 = "summary two";
 								        let snapshot2 = live_history.contents();
 								        let user_messages2 = collect_user_messages(&snapshot2);
 								        let rebuilt2 = build_compacted_history(
 								            session.build_initial_context(turn_context),
 								            &user_messages2,
 								            summary2,
 								        );
 								        live_history.replace(rebuilt2);
 								        rollout_items.push(RolloutItem::Compacted(CompactedItem {
 								            message: summary2.to_string(),
 								        }));
 								        let user3 = ResponseItem::Message {
 								            id: None,
 								            role: "user".to_string(),
 								            content: vec![ContentItem::InputText {
 								                text: "third user".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&user3));
 								        rollout_items.push(RolloutItem::ResponseItem(user3.clone()));
 								        let assistant3 = ResponseItem::Message {
 								            id: None,
 								            role: "assistant".to_string(),
 								            content: vec![ContentItem::OutputText {
 								                text: "assistant reply three".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&assistant3));
 								        rollout_items.push(RolloutItem::ResponseItem(assistant3.clone()));
 								        (rollout_items, live_history.contents())
 								    }
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
 								    #[tokio::test]
 								    async fn rejects_escalated_permissions_when_policy_not_on_request() {
 								        use crate::exec::ExecParams;
 								        use crate::protocol::AskForApproval;
 								        use crate::protocol::SandboxPolicy;
 								        use crate::turn_diff_tracker::TurnDiffTracker;
 								        use std::collections::HashMap;
 								        let (session, mut turn_context) = make_session_and_context();
 								        // Ensure policy is NOT OnRequest so the early rejection path triggers
 								        turn_context.approval_policy = AskForApproval::OnFailure;
 								        let params = ExecParams {
 								            command: if cfg!(windows) {
 								                vec![
 								                    "cmd.exe".to_string(),
 								                    "/C".to_string(),
 								                    "echo hi".to_string(),
 								                ]
 								            } else {
 								                vec![
 								                    "/bin/sh".to_string(),
 								                    "-c".to_string(),
 								                    "echo hi".to_string(),
 								                ]
 								            },
 								            cwd: turn_context.cwd.clone(),
 								            timeout_ms: Some(1000),
 								            env: HashMap::new(),
 								            with_escalated_permissions: Some(true),
 								            justification: Some("test".to_string()),
 								        };
 								        let params2 = ExecParams {
 								            with_escalated_permissions: Some(false),
 								            ..params.clone()
 								        };
 								        let mut turn_diff_tracker = TurnDiffTracker::new();
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        let tool_name = "shell";
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								        let sub_id = "test-sub".to_string();
 								        let call_id = "test-call".to_string();
 								        let resp = handle_container_exec_with_params(
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								            tool_name,
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								            params,
 								            &session,
 								            &turn_context,
 								            &mut turn_diff_tracker,
 								            sub_id,
 								            call_id,
 								        )
 								        .await;
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								        let Err(FunctionCallError::RespondToModel(output)) = resp else {
 								            panic!("expected error result");
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								        };
 								        let expected = format!(
 								            "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
 								            policy = turn_context.approval_policy
 								        );
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								        pretty_assertions::assert_eq!(output, expected);
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
 								        // Now retry the same command WITHOUT escalated permissions; should succeed.
 								        // Force DangerFullAccess to avoid platform sandbox dependencies in tests.
 								        turn_context.sandbox_policy = SandboxPolicy::DangerFullAccess;
 								        let resp2 = handle_container_exec_with_params(
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								            tool_name,
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								            params2,
 								            &session,
 								            &turn_context,
 								            &mut turn_diff_tracker,
 								            "test-sub".to_string(),
 								            "test-call-2".to_string(),
 								        )
 								        .await;
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								        let output = resp2.expect("expected Ok result");
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
 								        #[derive(Deserialize, PartialEq, Eq, Debug)]
 								        struct ResponseExecMetadata {
 								            exit_code: i32,
 								        }
 								        #[derive(Deserialize)]
 								        struct ResponseExecOutput {
 								            output: String,
 								            metadata: ResponseExecMetadata,
 								        }
 								        let exec_output: ResponseExecOutput =
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            serde_json::from_str(&output).expect("valid exec output json");
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
 								        pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
 								        assert!(exec_output.output.contains("hi"));
 								    }
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								}