codex-rs/core/src/codex.rs

use std::collections::HashMap;
use std::fmt::Debug;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::AtomicU64;

use crate::AuthManager;
use crate::client_common::REVIEW_PROMPT;
use crate::function_tool::FunctionCallError;
use crate::mcp::auth::McpAuthStatusEntry;
use crate::mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
use crate::parse_command::parse_command;
use crate::parse_turn_item;
use crate::response_processing::process_items;
use crate::review_format::format_review_findings_block;
use crate::terminal;
use crate::user_notification::UserNotifier;
use async_channel::Receiver;
use async_channel::Sender;
use codex_apply_patch::ApplyPatchAction;
use codex_protocol::ConversationId;
use codex_protocol::items::TurnItem;
use codex_protocol::protocol::ExitedReviewModeEvent;
use codex_protocol::protocol::ItemCompletedEvent;
use codex_protocol::protocol::ItemStartedEvent;
use codex_protocol::protocol::ReviewRequest;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::TaskStartedEvent;
use codex_protocol::protocol::TurnAbortReason;
use codex_protocol::protocol::TurnContextItem;
use futures::future::BoxFuture;
use futures::prelude::*;
use futures::stream::FuturesOrdered;
use mcp_types::CallToolResult;
use mcp_types::ListResourceTemplatesRequestParams;
use mcp_types::ListResourceTemplatesResult;
use mcp_types::ListResourcesRequestParams;
use mcp_types::ListResourcesResult;
use mcp_types::ReadResourceRequestParams;
use mcp_types::ReadResourceResult;
use serde_json;
use serde_json::Value;
use tokio::sync::Mutex;
use tokio::sync::oneshot;
use tokio_util::sync::CancellationToken;
use tracing::debug;
use tracing::error;
use tracing::info;
use tracing::trace;
use tracing::warn;

use crate::ModelProviderInfo;
use crate::apply_patch::convert_apply_patch_to_protocol;
use crate::client::ModelClient;
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::config::Config;
use crate::config_types::McpServerTransportConfig;
use crate::config_types::ShellEnvironmentPolicy;
use crate::conversation_history::ConversationHistory;
use crate::environment_context::EnvironmentContext;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
#[cfg(test)]
use crate::exec::StreamOutput;
// Removed: legacy executor wiring replaced by ToolOrchestrator flows.
// legacy normalize_exec_result no longer used after orchestrator migration
use crate::mcp::auth::compute_auth_statuses;
use crate::mcp_connection_manager::McpConnectionManager;
use crate::model_family::find_family_for_model;
use crate::openai_model_info::get_model_info;
use crate::project_doc::get_user_instructions;
use crate::protocol::AgentMessageDeltaEvent;
use crate::protocol::AgentReasoningDeltaEvent;
use crate::protocol::AgentReasoningRawContentDeltaEvent;
use crate::protocol::AgentReasoningSectionBreakEvent;
use crate::protocol::ApplyPatchApprovalRequestEvent;
use crate::protocol::AskForApproval;
use crate::protocol::BackgroundEventEvent;
use crate::protocol::ErrorEvent;
use crate::protocol::Event;
use crate::protocol::EventMsg;
use crate::protocol::ExecApprovalRequestEvent;
use crate::protocol::Op;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::ReviewDecision;
use crate::protocol::ReviewOutputEvent;
use crate::protocol::SandboxCommandAssessment;
use crate::protocol::SandboxPolicy;
use crate::protocol::SessionConfiguredEvent;
use crate::protocol::StreamErrorEvent;
use crate::protocol::Submission;
use crate::protocol::TokenCountEvent;
use crate::protocol::TokenUsage;
use crate::protocol::TurnDiffEvent;
use crate::protocol::WebSearchBeginEvent;
use crate::rollout::RolloutRecorder;
use crate::rollout::RolloutRecorderParams;
use crate::shell;
use crate::state::ActiveTurn;
use crate::state::SessionServices;
use crate::state::SessionState;
use crate::state::TaskKind;
use crate::tasks::GhostSnapshotTask;
use crate::tasks::ReviewTask;
use crate::tasks::SessionTask;
use crate::tasks::SessionTaskContext;
use crate::tools::ToolRouter;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::parallel::ToolCallRuntime;
use crate::tools::sandboxing::ApprovalStore;
use crate::tools::spec::ToolsConfig;
use crate::tools::spec::ToolsConfigParams;
use crate::turn_diff_tracker::TurnDiffTracker;
use crate::unified_exec::UnifiedExecSessionManager;
use crate::user_instructions::UserInstructions;
use crate::user_notification::UserNotification;
use crate::util::backoff;
use codex_async_utils::OrCancelExt;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::InitialHistory;
use codex_protocol::user_input::UserInput;
use codex_utils_readiness::Readiness;
use codex_utils_readiness::ReadinessFlag;

pub mod compact;
use self::compact::build_compacted_history;
use self::compact::collect_user_messages;

/// The high-level interface to the Codex system.
/// It operates as a queue pair where you send submissions and receive events.
pub struct Codex {
    next_id: AtomicU64,
    tx_sub: Sender<Submission>,
    rx_event: Receiver<Event>,
}

/// Wrapper returned by [`Codex::spawn`] containing the spawned [`Codex`],
/// the submission id for the initial `ConfigureSession` request and the
/// unique session id.
pub struct CodexSpawnOk {
    pub codex: Codex,
    pub conversation_id: ConversationId,
}

pub(crate) const INITIAL_SUBMIT_ID: &str = "";
pub(crate) const SUBMISSION_CHANNEL_CAPACITY: usize = 64;

impl Codex {
    /// Spawn a new [`Codex`] and initialize the session.
    pub async fn spawn(
        config: Config,
        auth_manager: Arc<AuthManager>,
        conversation_history: InitialHistory,
        session_source: SessionSource,
    ) -> CodexResult<CodexSpawnOk> {
        let (tx_sub, rx_sub) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
        let (tx_event, rx_event) = async_channel::unbounded();

        let user_instructions = get_user_instructions(&config).await;

        let config = Arc::new(config);

        let session_configuration = SessionConfiguration {
            provider: config.model_provider.clone(),
            model: config.model.clone(),
            model_reasoning_effort: config.model_reasoning_effort,
            model_reasoning_summary: config.model_reasoning_summary,
            user_instructions,
            base_instructions: config.base_instructions.clone(),
            approval_policy: config.approval_policy,
            sandbox_policy: config.sandbox_policy.clone(),
            cwd: config.cwd.clone(),
            original_config_do_not_use: Arc::clone(&config),
            features: config.features.clone(),
        };

        // Generate a unique ID for the lifetime of this Codex session.
        let session = Session::new(
            session_configuration,
            config.clone(),
            auth_manager.clone(),
            tx_event.clone(),
            conversation_history,
            session_source,
        )
        .await
        .map_err(|e| {
            error!("Failed to create session: {e:#}");
            CodexErr::InternalAgentDied
        })?;
        let conversation_id = session.conversation_id;

        // This task will run until Op::Shutdown is received.
        tokio::spawn(submission_loop(session, config, rx_sub));
        let codex = Codex {
            next_id: AtomicU64::new(0),
            tx_sub,
            rx_event,
        };

        Ok(CodexSpawnOk {
            codex,
            conversation_id,
        })
    }

    /// Submit the `op` wrapped in a `Submission` with a unique ID.
    pub async fn submit(&self, op: Op) -> CodexResult<String> {
        let id = self
            .next_id
            .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
            .to_string();
        let sub = Submission { id: id.clone(), op };
        self.submit_with_id(sub).await?;
        Ok(id)
    }

    /// Use sparingly: prefer `submit()` so Codex is responsible for generating
    /// unique IDs for each submission.
    pub async fn submit_with_id(&self, sub: Submission) -> CodexResult<()> {
        self.tx_sub
            .send(sub)
            .await
            .map_err(|_| CodexErr::InternalAgentDied)?;
        Ok(())
    }

    pub async fn next_event(&self) -> CodexResult<Event> {
        let event = self
            .rx_event
            .recv()
            .await
            .map_err(|_| CodexErr::InternalAgentDied)?;
        Ok(event)
    }
}

/// Context for an initialized model agent
///
/// A session has at most 1 running task at a time, and can be interrupted by user input.
pub(crate) struct Session {
    conversation_id: ConversationId,
    tx_event: Sender<Event>,
    state: Mutex<SessionState>,
    pub(crate) active_turn: Mutex<Option<ActiveTurn>>,
    pub(crate) services: SessionServices,
    next_internal_sub_id: AtomicU64,
}

/// The context needed for a single turn of the conversation.
#[derive(Debug)]
pub(crate) struct TurnContext {
    pub(crate) sub_id: String,
    pub(crate) client: ModelClient,
    /// The session's current working directory. All relative paths provided by
    /// the model as well as sandbox policies are resolved against this path
    /// instead of `std::env::current_dir()`.
    pub(crate) cwd: PathBuf,
    pub(crate) base_instructions: Option<String>,
    pub(crate) user_instructions: Option<String>,
    pub(crate) approval_policy: AskForApproval,
    pub(crate) sandbox_policy: SandboxPolicy,
    pub(crate) shell_environment_policy: ShellEnvironmentPolicy,
    pub(crate) tools_config: ToolsConfig,
    pub(crate) is_review_mode: bool,
    pub(crate) final_output_json_schema: Option<Value>,
    pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
    pub(crate) tool_call_gate: Arc<ReadinessFlag>,
}

impl TurnContext {
    pub(crate) fn resolve_path(&self, path: Option<String>) -> PathBuf {
        path.as_ref()
            .map(PathBuf::from)
            .map_or_else(|| self.cwd.clone(), |p| self.cwd.join(p))
    }
}

#[derive(Clone)]
pub(crate) struct SessionConfiguration {
    /// Provider identifier ("openai", "openrouter", ...).
    provider: ModelProviderInfo,

    /// If not specified, server will use its default model.
    model: String,

    model_reasoning_effort: Option<ReasoningEffortConfig>,
    model_reasoning_summary: ReasoningSummaryConfig,

    /// Model instructions that are appended to the base instructions.
    user_instructions: Option<String>,

    /// Base instructions override.
    base_instructions: Option<String>,

    /// When to escalate for approval for execution
    approval_policy: AskForApproval,
    /// How to sandbox commands executed in the system
    sandbox_policy: SandboxPolicy,

    /// Working directory that should be treated as the *root* of the
    /// session. All relative paths supplied by the model as well as the
    /// execution sandbox are resolved against this directory **instead**
    /// of the process-wide current working directory. CLI front-ends are
    /// expected to expand this to an absolute path before sending the
    /// `ConfigureSession` operation so that the business-logic layer can
    /// operate deterministically.
    cwd: PathBuf,

    /// Set of feature flags for this session
    features: Features,

    // TODO(pakrym): Remove config from here
    original_config_do_not_use: Arc<Config>,
}

impl SessionConfiguration {
    pub(crate) fn apply(&self, updates: &SessionSettingsUpdate) -> Self {
        let mut next_configuration = self.clone();
        if let Some(model) = updates.model.clone() {
            next_configuration.model = model;
        }
        if let Some(effort) = updates.reasoning_effort {
            next_configuration.model_reasoning_effort = effort;
        }
        if let Some(summary) = updates.reasoning_summary {
            next_configuration.model_reasoning_summary = summary;
        }
        if let Some(approval_policy) = updates.approval_policy {
            next_configuration.approval_policy = approval_policy;
        }
        if let Some(sandbox_policy) = updates.sandbox_policy.clone() {
            next_configuration.sandbox_policy = sandbox_policy;
        }
        if let Some(cwd) = updates.cwd.clone() {
            next_configuration.cwd = cwd;
        }
        next_configuration
    }
}

#[derive(Default, Clone)]
pub(crate) struct SessionSettingsUpdate {
    pub(crate) cwd: Option<PathBuf>,
    pub(crate) approval_policy: Option<AskForApproval>,
    pub(crate) sandbox_policy: Option<SandboxPolicy>,
    pub(crate) model: Option<String>,
    pub(crate) reasoning_effort: Option<Option<ReasoningEffortConfig>>,
    pub(crate) reasoning_summary: Option<ReasoningSummaryConfig>,
    pub(crate) final_output_json_schema: Option<Option<Value>>,
}

impl Session {
    fn make_turn_context(
        auth_manager: Option<Arc<AuthManager>>,
        otel_event_manager: &OtelEventManager,
        provider: ModelProviderInfo,
        session_configuration: &SessionConfiguration,
        conversation_id: ConversationId,
        sub_id: String,
    ) -> TurnContext {
        let config = session_configuration.original_config_do_not_use.clone();
        let model_family = find_family_for_model(&session_configuration.model)
            .unwrap_or_else(|| config.model_family.clone());
        let mut per_turn_config = (*config).clone();
        per_turn_config.model = session_configuration.model.clone();
        per_turn_config.model_family = model_family.clone();
        per_turn_config.model_reasoning_effort = session_configuration.model_reasoning_effort;
        per_turn_config.model_reasoning_summary = session_configuration.model_reasoning_summary;
        if let Some(model_info) = get_model_info(&model_family) {
            per_turn_config.model_context_window = Some(model_info.context_window);
        }

        let otel_event_manager = otel_event_manager.clone().with_model(
            session_configuration.model.as_str(),
            session_configuration.model.as_str(),
        );

        let client = ModelClient::new(
            Arc::new(per_turn_config),
            auth_manager,
            otel_event_manager,
            provider,
            session_configuration.model_reasoning_effort,
            session_configuration.model_reasoning_summary,
            conversation_id,
        );

        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
            features: &config.features,
        });

        TurnContext {
            sub_id,
            client,
            cwd: session_configuration.cwd.clone(),
            base_instructions: session_configuration.base_instructions.clone(),
            user_instructions: session_configuration.user_instructions.clone(),
            approval_policy: session_configuration.approval_policy,
            sandbox_policy: session_configuration.sandbox_policy.clone(),
            shell_environment_policy: config.shell_environment_policy.clone(),
            tools_config,
            is_review_mode: false,
            final_output_json_schema: None,
            codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
            tool_call_gate: Arc::new(ReadinessFlag::new()),
        }
    }

    async fn new(
        session_configuration: SessionConfiguration,
        config: Arc<Config>,
        auth_manager: Arc<AuthManager>,
        tx_event: Sender<Event>,
        initial_history: InitialHistory,
        session_source: SessionSource,
    ) -> anyhow::Result<Arc<Self>> {
        debug!(
            "Configuring session: model={}; provider={:?}",
            session_configuration.model, session_configuration.provider
        );
        if !session_configuration.cwd.is_absolute() {
            return Err(anyhow::anyhow!(
                "cwd is not absolute: {:?}",
                session_configuration.cwd
            ));
        }

        let (conversation_id, rollout_params) = match &initial_history {
            InitialHistory::New | InitialHistory::Forked(_) => {
                let conversation_id = ConversationId::default();
                (
                    conversation_id,
                    RolloutRecorderParams::new(
                        conversation_id,
                        session_configuration.user_instructions.clone(),
                        session_source,
                    ),
                )
            }
            InitialHistory::Resumed(resumed_history) => (
                resumed_history.conversation_id,
                RolloutRecorderParams::resume(resumed_history.rollout_path.clone()),
            ),
        };

        // Error messages to dispatch after SessionConfigured is sent.
        let mut post_session_configured_error_events = Vec::<Event>::new();

        // Kick off independent async setup tasks in parallel to reduce startup latency.
        //
        // - initialize RolloutRecorder with new or resumed session info
        // - spin up MCP connection manager
        // - perform default shell discovery
        // - load history metadata
        let rollout_fut = RolloutRecorder::new(&config, rollout_params);

        let mcp_fut = McpConnectionManager::new(
            config.mcp_servers.clone(),
            config.mcp_oauth_credentials_store_mode,
        );
        let default_shell_fut = shell::default_user_shell();
        let history_meta_fut = crate::message_history::history_metadata(&config);
        let auth_statuses_fut = compute_auth_statuses(
            config.mcp_servers.iter(),
            config.mcp_oauth_credentials_store_mode,
        );

        // Join all independent futures.
        let (
            rollout_recorder,
            mcp_res,
            default_shell,
            (history_log_id, history_entry_count),
            auth_statuses,
        ) = tokio::join!(
            rollout_fut,
            mcp_fut,
            default_shell_fut,
            history_meta_fut,
            auth_statuses_fut
        );

        let rollout_recorder = rollout_recorder.map_err(|e| {
            error!("failed to initialize rollout recorder: {e:#}");
            anyhow::anyhow!("failed to initialize rollout recorder: {e:#}")
        })?;
        let rollout_path = rollout_recorder.rollout_path.clone();

        // Handle MCP manager result and record any startup failures.
        let (mcp_connection_manager, failed_clients) = match mcp_res {
            Ok((mgr, failures)) => (mgr, failures),
            Err(e) => {
                let message = format!("Failed to create MCP connection manager: {e:#}");
                error!("{message}");
                post_session_configured_error_events.push(Event {
                    id: INITIAL_SUBMIT_ID.to_owned(),
                    msg: EventMsg::Error(ErrorEvent { message }),
                });
                (McpConnectionManager::default(), Default::default())
            }
        };

        // Surface individual client start-up failures to the user.
        if !failed_clients.is_empty() {
            for (server_name, err) in failed_clients {
                let auth_entry = auth_statuses.get(&server_name);
                let display_message = mcp_init_error_display(&server_name, auth_entry, &err);
                warn!("MCP client for `{server_name}` failed to start: {err:#}");
                post_session_configured_error_events.push(Event {
                    id: INITIAL_SUBMIT_ID.to_owned(),
                    msg: EventMsg::Error(ErrorEvent {
                        message: display_message,
                    }),
                });
            }
        }

        let otel_event_manager = OtelEventManager::new(
            conversation_id,
            config.model.as_str(),
            config.model_family.slug.as_str(),
            auth_manager.auth().and_then(|a| a.get_account_id()),
            auth_manager.auth().and_then(|a| a.get_account_email()),
            auth_manager.auth().map(|a| a.mode),
            config.otel.log_user_prompt,
            terminal::user_agent(),
        );

        otel_event_manager.conversation_starts(
            config.model_provider.name.as_str(),
            config.model_reasoning_effort,
            config.model_reasoning_summary,
            config.model_context_window,
            config.model_max_output_tokens,
            config.model_auto_compact_token_limit,
            config.approval_policy,
            config.sandbox_policy.clone(),
            config.mcp_servers.keys().map(String::as_str).collect(),
            config.active_profile.clone(),
        );

        // Create the mutable state for the Session.
        let state = SessionState::new(session_configuration.clone());

        let services = SessionServices {
            mcp_connection_manager,
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::new(config.notify.clone()),
            rollout: Mutex::new(Some(rollout_recorder)),
            user_shell: default_shell,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
            auth_manager: Arc::clone(&auth_manager),
            otel_event_manager,
            tool_approvals: Mutex::new(ApprovalStore::default()),
        };

        let sess = Arc::new(Session {
            conversation_id,
            tx_event: tx_event.clone(),
            state: Mutex::new(state),
            active_turn: Mutex::new(None),
            services,
            next_internal_sub_id: AtomicU64::new(0),
        });

        // Dispatch the SessionConfiguredEvent first and then report any errors.
        // If resuming, include converted initial messages in the payload so UIs can render them immediately.
        let initial_messages = initial_history.get_event_msgs();

        let events = std::iter::once(Event {
            id: INITIAL_SUBMIT_ID.to_owned(),
            msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
                session_id: conversation_id,
                model: session_configuration.model.clone(),
                reasoning_effort: session_configuration.model_reasoning_effort,
                history_log_id,
                history_entry_count,
                initial_messages,
                rollout_path,
            }),
        })
        .chain(post_session_configured_error_events.into_iter());
        for event in events {
            sess.send_event_raw(event).await;
        }

        // record_initial_history can emit events. We record only after the SessionConfiguredEvent is emitted.
        sess.record_initial_history(initial_history).await;

        Ok(sess)
    }

    pub(crate) fn get_tx_event(&self) -> Sender<Event> {
        self.tx_event.clone()
    }

    /// Ensure all rollout writes are durably flushed.
    pub(crate) async fn flush_rollout(&self) {
        let recorder = {
            let guard = self.services.rollout.lock().await;
            guard.clone()
        };
        if let Some(rec) = recorder
            && let Err(e) = rec.flush().await
        {
            warn!("failed to flush rollout recorder: {e}");
        }
    }

    fn next_internal_sub_id(&self) -> String {
        let id = self
            .next_internal_sub_id
            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
        format!("auto-compact-{id}")
    }

    async fn record_initial_history(&self, conversation_history: InitialHistory) {
        let turn_context = self.new_turn(SessionSettingsUpdate::default()).await;
        match conversation_history {
            InitialHistory::New => {
                // Build and record initial items (user instructions + environment context)
                let items = self.build_initial_context(&turn_context);
                self.record_conversation_items(&turn_context, &items).await;
                // Ensure initial items are visible to immediate readers (e.g., tests, forks).
                self.flush_rollout().await;
            }
            InitialHistory::Resumed(_) | InitialHistory::Forked(_) => {
                let rollout_items = conversation_history.get_rollout_items();
                let persist = matches!(conversation_history, InitialHistory::Forked(_));

                // Always add response items to conversation history
                let reconstructed_history =
                    self.reconstruct_history_from_rollout(&turn_context, &rollout_items);
                if !reconstructed_history.is_empty() {
                    self.record_into_history(&reconstructed_history).await;
                }

                // If persisting, persist all rollout items as-is (recorder filters)
                if persist && !rollout_items.is_empty() {
                    self.persist_rollout_items(&rollout_items).await;
                }
                // Flush after seeding history and any persisted rollout copy.
                self.flush_rollout().await;
            }
        }
    }

    pub(crate) async fn update_settings(&self, updates: SessionSettingsUpdate) {
        let mut state = self.state.lock().await;

        state.session_configuration = state.session_configuration.apply(&updates);
    }

    pub(crate) async fn new_turn(&self, updates: SessionSettingsUpdate) -> Arc<TurnContext> {
        let sub_id = self.next_internal_sub_id();
        self.new_turn_with_sub_id(sub_id, updates).await
    }

    pub(crate) async fn new_turn_with_sub_id(
        &self,
        sub_id: String,
        updates: SessionSettingsUpdate,
    ) -> Arc<TurnContext> {
        let session_configuration = {
            let mut state = self.state.lock().await;
            let session_configuration = state.session_configuration.clone().apply(&updates);
            state.session_configuration = session_configuration.clone();
            session_configuration
        };

        let mut turn_context: TurnContext = Self::make_turn_context(
            Some(Arc::clone(&self.services.auth_manager)),
            &self.services.otel_event_manager,
            session_configuration.provider.clone(),
            &session_configuration,
            self.conversation_id,
            sub_id,
        );
        if let Some(final_schema) = updates.final_output_json_schema {
            turn_context.final_output_json_schema = final_schema;
        }
        Arc::new(turn_context)
    }

    fn build_environment_update_item(
        &self,
        previous: Option<&Arc<TurnContext>>,
        next: &TurnContext,
    ) -> Option<ResponseItem> {
        let prev = previous?;

        let prev_context = EnvironmentContext::from(prev.as_ref());
        let next_context = EnvironmentContext::from(next);
        if prev_context.equals_except_shell(&next_context) {
            return None;
        }
        Some(ResponseItem::from(EnvironmentContext::diff(
            prev.as_ref(),
            next,
        )))
    }

    /// Persist the event to rollout and send it to clients.
    pub(crate) async fn send_event(&self, turn_context: &TurnContext, msg: EventMsg) {
        let event = Event {
            id: turn_context.sub_id.clone(),
            msg,
        };
        self.send_event_raw(event).await;
    }

    pub(crate) async fn send_event_raw(&self, event: Event) {
        // Persist the event into rollout (recorder filters as needed)
        let rollout_items = vec![RolloutItem::EventMsg(event.msg.clone())];
        self.persist_rollout_items(&rollout_items).await;
        if let Err(e) = self.tx_event.send(event).await {
            error!("failed to send tool call event: {e}");
        }
    }

    async fn emit_turn_item_started(&self, turn_context: &TurnContext, item: &TurnItem) {
        self.send_event(
            turn_context,
            EventMsg::ItemStarted(ItemStartedEvent {
                thread_id: self.conversation_id,
                turn_id: turn_context.sub_id.clone(),
                item: item.clone(),
            }),
        )
        .await;
    }

    async fn emit_turn_item_completed(
        &self,
        turn_context: &TurnContext,
        item: TurnItem,
        emit_raw_agent_reasoning: bool,
    ) {
        self.send_event(
            turn_context,
            EventMsg::ItemCompleted(ItemCompletedEvent {
                thread_id: self.conversation_id,
                turn_id: turn_context.sub_id.clone(),
                item: item.clone(),
            }),
        )
        .await;
        self.emit_turn_item_legacy_events(turn_context, &item, emit_raw_agent_reasoning)
            .await;
    }

    async fn emit_turn_item_started_completed(
        &self,
        turn_context: &TurnContext,
        item: TurnItem,
        emit_raw_agent_reasoning: bool,
    ) {
        self.emit_turn_item_started(turn_context, &item).await;
        self.emit_turn_item_completed(turn_context, item, emit_raw_agent_reasoning)
            .await;
    }

    async fn emit_turn_item_legacy_events(
        &self,
        turn_context: &TurnContext,
        item: &TurnItem,
        emit_raw_agent_reasoning: bool,
    ) {
        for event in item.as_legacy_events(emit_raw_agent_reasoning) {
            self.send_event(turn_context, event).await;
        }
    }

    pub(crate) async fn assess_sandbox_command(
        &self,
        turn_context: &TurnContext,
        call_id: &str,
        command: &[String],
        failure_message: Option<&str>,
    ) -> Option<SandboxCommandAssessment> {
        let config = turn_context.client.config();
        let provider = turn_context.client.provider().clone();
        let auth_manager = Arc::clone(&self.services.auth_manager);
        let otel = self.services.otel_event_manager.clone();
        crate::sandboxing::assessment::assess_command(
            config,
            provider,
            auth_manager,
            &otel,
            self.conversation_id,
            call_id,
            command,
            &turn_context.sandbox_policy,
            &turn_context.cwd,
            failure_message,
        )
        .await
    }

    /// Emit an exec approval request event and await the user's decision.
    ///
    /// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
    /// to the correct in-flight turn. If the task is aborted, this returns the
    /// default `ReviewDecision` (`Denied`).
    pub async fn request_command_approval(
        &self,
        turn_context: &TurnContext,
        call_id: String,
        command: Vec<String>,
        cwd: PathBuf,
        reason: Option<String>,
        risk: Option<SandboxCommandAssessment>,
    ) -> ReviewDecision {
        let sub_id = turn_context.sub_id.clone();
        // Add the tx_approve callback to the map before sending the request.
        let (tx_approve, rx_approve) = oneshot::channel();
        let event_id = sub_id.clone();
        let prev_entry = {
            let mut active = self.active_turn.lock().await;
            match active.as_mut() {
                Some(at) => {
                    let mut ts = at.turn_state.lock().await;
                    ts.insert_pending_approval(sub_id, tx_approve)
                }
                None => None,
            }
        };
        if prev_entry.is_some() {
            warn!("Overwriting existing pending approval for sub_id: {event_id}");
        }

        let parsed_cmd = parse_command(&command);
        let event = EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
            call_id,
            command,
            cwd,
            reason,
            risk,
            parsed_cmd,
        });
        self.send_event(turn_context, event).await;
        rx_approve.await.unwrap_or_default()
    }

    pub async fn request_patch_approval(
        &self,
        turn_context: &TurnContext,
        call_id: String,
        action: &ApplyPatchAction,
        reason: Option<String>,
        grant_root: Option<PathBuf>,
    ) -> oneshot::Receiver<ReviewDecision> {
        let sub_id = turn_context.sub_id.clone();
        // Add the tx_approve callback to the map before sending the request.
        let (tx_approve, rx_approve) = oneshot::channel();
        let event_id = sub_id.clone();
        let prev_entry = {
            let mut active = self.active_turn.lock().await;
            match active.as_mut() {
                Some(at) => {
                    let mut ts = at.turn_state.lock().await;
                    ts.insert_pending_approval(sub_id, tx_approve)
                }
                None => None,
            }
        };
        if prev_entry.is_some() {
            warn!("Overwriting existing pending approval for sub_id: {event_id}");
        }

        let event = EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
            call_id,
            changes: convert_apply_patch_to_protocol(action),
            reason,
            grant_root,
        });
        self.send_event(turn_context, event).await;
        rx_approve
    }

    pub async fn notify_approval(&self, sub_id: &str, decision: ReviewDecision) {
        let entry = {
            let mut active = self.active_turn.lock().await;
            match active.as_mut() {
                Some(at) => {
                    let mut ts = at.turn_state.lock().await;
                    ts.remove_pending_approval(sub_id)
                }
                None => None,
            }
        };
        match entry {
            Some(tx_approve) => {
                tx_approve.send(decision).ok();
            }
            None => {
                warn!("No pending approval found for sub_id: {sub_id}");
            }
        }
    }

    /// Records input items: always append to conversation history and
    /// persist these response items to rollout.
    pub(crate) async fn record_conversation_items(
        &self,
        turn_context: &TurnContext,
        items: &[ResponseItem],
    ) {
        self.record_into_history(items).await;
        self.persist_rollout_response_items(items).await;
        self.send_raw_response_items(turn_context, items).await;
    }

    fn reconstruct_history_from_rollout(
        &self,
        turn_context: &TurnContext,
        rollout_items: &[RolloutItem],
    ) -> Vec<ResponseItem> {
        let mut history = ConversationHistory::new();
        for item in rollout_items {
            match item {
                RolloutItem::ResponseItem(response_item) => {
                    history.record_items(std::iter::once(response_item));
                }
                RolloutItem::Compacted(compacted) => {
                    let snapshot = history.get_history();
                    let user_messages = collect_user_messages(&snapshot);
                    let rebuilt = build_compacted_history(
                        self.build_initial_context(turn_context),
                        &user_messages,
                        &compacted.message,
                    );
                    history.replace(rebuilt);
                }
                _ => {}
            }
        }
        history.get_history()
    }

    /// Append ResponseItems to the in-memory conversation history only.
    async fn record_into_history(&self, items: &[ResponseItem]) {
        let mut state = self.state.lock().await;
        state.record_items(items.iter());
    }

    pub(crate) async fn replace_history(&self, items: Vec<ResponseItem>) {
        let mut state = self.state.lock().await;
        state.replace_history(items);
    }

    async fn persist_rollout_response_items(&self, items: &[ResponseItem]) {
        let rollout_items: Vec<RolloutItem> = items
            .iter()
            .cloned()
            .map(RolloutItem::ResponseItem)
            .collect();
        self.persist_rollout_items(&rollout_items).await;
    }

    async fn send_raw_response_items(&self, turn_context: &TurnContext, items: &[ResponseItem]) {
        for item in items {
            self.send_event(turn_context, EventMsg::RawResponseItem(item.clone()))
                .await;
        }
    }

    pub(crate) fn build_initial_context(&self, turn_context: &TurnContext) -> Vec<ResponseItem> {
        let mut items = Vec::<ResponseItem>::with_capacity(2);
        if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
            items.push(UserInstructions::new(user_instructions.to_string()).into());
        }
        items.push(ResponseItem::from(EnvironmentContext::new(
            Some(turn_context.cwd.clone()),
            Some(turn_context.approval_policy),
            Some(turn_context.sandbox_policy.clone()),
            Some(self.user_shell().clone()),
        )));
        items
    }

    async fn persist_rollout_items(&self, items: &[RolloutItem]) {
        let recorder = {
            let guard = self.services.rollout.lock().await;
            guard.clone()
        };
        if let Some(rec) = recorder
            && let Err(e) = rec.record_items(items).await
        {
            error!("failed to record rollout items: {e:#}");
        }
    }

    pub(crate) async fn clone_history(&self) -> ConversationHistory {
        let state = self.state.lock().await;
        state.clone_history()
    }

    async fn update_token_usage_info(
        &self,
        turn_context: &TurnContext,
        token_usage: Option<&TokenUsage>,
    ) {
        {
            let mut state = self.state.lock().await;
            if let Some(token_usage) = token_usage {
                state.update_token_info_from_usage(
                    token_usage,
                    turn_context.client.get_model_context_window(),
                );
            }
        }
        self.send_token_count_event(turn_context).await;
    }

    async fn update_rate_limits(
        &self,
        turn_context: &TurnContext,
        new_rate_limits: RateLimitSnapshot,
    ) {
        {
            let mut state = self.state.lock().await;
            state.set_rate_limits(new_rate_limits);
        }
        self.send_token_count_event(turn_context).await;
    }

    async fn send_token_count_event(&self, turn_context: &TurnContext) {
        let (info, rate_limits) = {
            let state = self.state.lock().await;
            state.token_info_and_rate_limits()
        };
        let event = EventMsg::TokenCount(TokenCountEvent { info, rate_limits });
        self.send_event(turn_context, event).await;
    }

    async fn set_total_tokens_full(&self, turn_context: &TurnContext) {
        let context_window = turn_context.client.get_model_context_window();
        if let Some(context_window) = context_window {
            {
                let mut state = self.state.lock().await;
                state.set_token_usage_full(context_window);
            }
            self.send_token_count_event(turn_context).await;
        }
    }

    /// Record a user input item to conversation history and also persist a
    /// corresponding UserMessage EventMsg to rollout.
    async fn record_input_and_rollout_usermsg(
        &self,
        turn_context: &TurnContext,
        response_input: &ResponseInputItem,
    ) {
        let response_item: ResponseItem = response_input.clone().into();
        // Add to conversation history and persist response item to rollout
        self.record_conversation_items(turn_context, std::slice::from_ref(&response_item))
            .await;

        // Derive user message events and persist only UserMessage to rollout
        let turn_item = parse_turn_item(&response_item);

        if let Some(item @ TurnItem::UserMessage(_)) = turn_item {
            self.emit_turn_item_started_completed(turn_context, item, false)
                .await;
        }
    }

    /// Helper that emits a BackgroundEvent with the given message. This keeps
    /// the call‑sites terse so adding more diagnostics does not clutter the
    /// core agent logic.
    pub(crate) async fn notify_background_event(
        &self,
        turn_context: &TurnContext,
        message: impl Into<String>,
    ) {
        let event = EventMsg::BackgroundEvent(BackgroundEventEvent {
            message: message.into(),
        });
        self.send_event(turn_context, event).await;
    }

    async fn notify_stream_error(&self, turn_context: &TurnContext, message: impl Into<String>) {
        let event = EventMsg::StreamError(StreamErrorEvent {
            message: message.into(),
        });
        self.send_event(turn_context, event).await;
    }

    async fn maybe_start_ghost_snapshot(
        self: &Arc<Self>,
        turn_context: Arc<TurnContext>,
        cancellation_token: CancellationToken,
    ) {
        if turn_context.is_review_mode
            || !self
                .state
                .lock()
                .await
                .session_configuration
                .features
                .enabled(Feature::GhostCommit)
        {
            return;
        }

        let token = match turn_context.tool_call_gate.subscribe().await {
            Ok(token) => token,
            Err(err) => {
                warn!("failed to subscribe to ghost snapshot readiness: {err}");
                return;
            }
        };

        info!("spawning ghost snapshot task");
        let task = GhostSnapshotTask::new(token);
        Arc::new(task)
            .run(
                Arc::new(SessionTaskContext::new(self.clone())),
                turn_context.clone(),
                Vec::new(),
                cancellation_token,
            )
            .await;
    }

    /// Returns the input if there was no task running to inject into
    pub async fn inject_input(&self, input: Vec<UserInput>) -> Result<(), Vec<UserInput>> {
        let mut active = self.active_turn.lock().await;
        match active.as_mut() {
            Some(at) => {
                let mut ts = at.turn_state.lock().await;
                ts.push_pending_input(input.into());
                Ok(())
            }
            None => Err(input),
        }
    }

    pub async fn get_pending_input(&self) -> Vec<ResponseInputItem> {
        let mut active = self.active_turn.lock().await;
        match active.as_mut() {
            Some(at) => {
                let mut ts = at.turn_state.lock().await;
                ts.take_pending_input()
            }
            None => Vec::with_capacity(0),
        }
    }

    pub async fn list_resources(
        &self,
        server: &str,
        params: Option<ListResourcesRequestParams>,
    ) -> anyhow::Result<ListResourcesResult> {
        self.services
            .mcp_connection_manager
            .list_resources(server, params)
            .await
    }

    pub async fn list_resource_templates(
        &self,
        server: &str,
        params: Option<ListResourceTemplatesRequestParams>,
    ) -> anyhow::Result<ListResourceTemplatesResult> {
        self.services
            .mcp_connection_manager
            .list_resource_templates(server, params)
            .await
    }

    pub async fn read_resource(
        &self,
        server: &str,
        params: ReadResourceRequestParams,
    ) -> anyhow::Result<ReadResourceResult> {
        self.services
            .mcp_connection_manager
            .read_resource(server, params)
            .await
    }

    pub async fn call_tool(
        &self,
        server: &str,
        tool: &str,
        arguments: Option<serde_json::Value>,
    ) -> anyhow::Result<CallToolResult> {
        self.services
            .mcp_connection_manager
            .call_tool(server, tool, arguments)
            .await
    }

    pub(crate) fn parse_mcp_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
        self.services
            .mcp_connection_manager
            .parse_tool_name(tool_name)
    }

    pub async fn interrupt_task(self: &Arc<Self>) {
        info!("interrupt received: abort current task, if any");
        self.abort_all_tasks(TurnAbortReason::Interrupted).await;
    }

    pub(crate) fn notifier(&self) -> &UserNotifier {
        &self.services.notifier
    }

    pub(crate) fn user_shell(&self) -> &shell::Shell {
        &self.services.user_shell
    }

    fn show_raw_agent_reasoning(&self) -> bool {
        self.services.show_raw_agent_reasoning
    }
}

async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiver<Submission>) {
    let mut previous_context: Option<Arc<TurnContext>> = None;
    // To break out of this loop, send Op::Shutdown.
    while let Ok(sub) = rx_sub.recv().await {
        debug!(?sub, "Submission");
        match sub.op.clone() {
            Op::Interrupt => {
                handlers::interrupt(&sess).await;
            }
            Op::OverrideTurnContext {
                cwd,
                approval_policy,
                sandbox_policy,
                model,
                effort,
                summary,
            } => {
                handlers::override_turn_context(
                    &sess,
                    SessionSettingsUpdate {
                        cwd,
                        approval_policy,
                        sandbox_policy,
                        model,
                        reasoning_effort: effort,
                        reasoning_summary: summary,
                        ..Default::default()
                    },
                )
                .await;
            }
            Op::UserInput { .. } | Op::UserTurn { .. } => {
                handlers::user_input_or_turn(&sess, sub.id.clone(), sub.op, &mut previous_context)
                    .await;
            }
            Op::ExecApproval { id, decision } => {
                handlers::exec_approval(&sess, id, decision).await;
            }
            Op::PatchApproval { id, decision } => {
                handlers::patch_approval(&sess, id, decision).await;
            }
            Op::AddToHistory { text } => {
                handlers::add_to_history(&sess, &config, text).await;
            }
            Op::GetHistoryEntryRequest { offset, log_id } => {
                handlers::get_history_entry_request(&sess, &config, sub.id.clone(), offset, log_id)
                    .await;
            }
            Op::ListMcpTools => {
                handlers::list_mcp_tools(&sess, &config, sub.id.clone()).await;
            }
            Op::ListCustomPrompts => {
                handlers::list_custom_prompts(&sess, sub.id.clone()).await;
            }
            Op::Undo => {
                handlers::undo(&sess, sub.id.clone()).await;
            }
            Op::Compact => {
                handlers::compact(&sess, sub.id.clone()).await;
            }
            Op::RunUserShellCommand { command } => {
                handlers::run_user_shell_command(
                    &sess,
                    sub.id.clone(),
                    command,
                    &mut previous_context,
                )
                .await;
            }
            Op::Shutdown => {
                if handlers::shutdown(&sess, sub.id.clone()).await {
                    break;
                }
            }
            Op::Review { review_request } => {
                handlers::review(&sess, &config, sub.id.clone(), review_request).await;
            }
            _ => {} // Ignore unknown ops; enum is non_exhaustive to allow extensions.
        }
    }
    debug!("Agent loop exited");
}

/// Operation handlers
mod handlers {
    use crate::codex::Session;
    use crate::codex::SessionSettingsUpdate;
    use crate::codex::TurnContext;
    use crate::codex::compact;
    use crate::codex::spawn_review_thread;
    use crate::config::Config;
    use crate::mcp::auth::compute_auth_statuses;
    use crate::tasks::CompactTask;
    use crate::tasks::RegularTask;
    use crate::tasks::UndoTask;
    use crate::tasks::UserShellCommandTask;
    use codex_protocol::custom_prompts::CustomPrompt;
    use codex_protocol::protocol::ErrorEvent;
    use codex_protocol::protocol::Event;
    use codex_protocol::protocol::EventMsg;
    use codex_protocol::protocol::ListCustomPromptsResponseEvent;
    use codex_protocol::protocol::Op;
    use codex_protocol::protocol::ReviewDecision;
    use codex_protocol::protocol::ReviewRequest;
    use codex_protocol::protocol::TurnAbortReason;
    use codex_protocol::user_input::UserInput;
    use std::sync::Arc;
    use tracing::info;
    use tracing::warn;

    pub async fn interrupt(sess: &Arc<Session>) {
        sess.interrupt_task().await;
    }

    pub async fn override_turn_context(sess: &Session, updates: SessionSettingsUpdate) {
        sess.update_settings(updates).await;
    }

    pub async fn user_input_or_turn(
        sess: &Arc<Session>,
        sub_id: String,
        op: Op,
        previous_context: &mut Option<Arc<TurnContext>>,
    ) {
        let (items, updates) = match op {
            Op::UserTurn {
                cwd,
                approval_policy,
                sandbox_policy,
                model,
                effort,
                summary,
                final_output_json_schema,
                items,
            } => (
                items,
                SessionSettingsUpdate {
                    cwd: Some(cwd),
                    approval_policy: Some(approval_policy),
                    sandbox_policy: Some(sandbox_policy),
                    model: Some(model),
                    reasoning_effort: Some(effort),
                    reasoning_summary: Some(summary),
                    final_output_json_schema: Some(final_output_json_schema),
                },
            ),
            Op::UserInput { items } => (items, SessionSettingsUpdate::default()),
            _ => unreachable!(),
        };

        let current_context = sess.new_turn_with_sub_id(sub_id, updates).await;
        current_context
            .client
            .get_otel_event_manager()
            .user_prompt(&items);

        // Attempt to inject input into current task
        if let Err(items) = sess.inject_input(items).await {
            if let Some(env_item) =
                sess.build_environment_update_item(previous_context.as_ref(), &current_context)
            {
                sess.record_conversation_items(&current_context, std::slice::from_ref(&env_item))
                    .await;
            }

            sess.spawn_task(Arc::clone(&current_context), items, RegularTask)
                .await;
            *previous_context = Some(current_context);
        }
    }

    pub async fn run_user_shell_command(
        sess: &Arc<Session>,
        sub_id: String,
        command: String,
        previous_context: &mut Option<Arc<TurnContext>>,
    ) {
        let turn_context = sess
            .new_turn_with_sub_id(sub_id, SessionSettingsUpdate::default())
            .await;
        sess.spawn_task(
            Arc::clone(&turn_context),
            Vec::new(),
            UserShellCommandTask::new(command),
        )
        .await;
        *previous_context = Some(turn_context);
    }

    pub async fn exec_approval(sess: &Arc<Session>, id: String, decision: ReviewDecision) {
        match decision {
            ReviewDecision::Abort => {
                sess.interrupt_task().await;
            }
            other => sess.notify_approval(&id, other).await,
        }
    }

    pub async fn patch_approval(sess: &Arc<Session>, id: String, decision: ReviewDecision) {
        match decision {
            ReviewDecision::Abort => {
                sess.interrupt_task().await;
            }
            other => sess.notify_approval(&id, other).await,
        }
    }

    pub async fn add_to_history(sess: &Arc<Session>, config: &Arc<Config>, text: String) {
        let id = sess.conversation_id;
        let config = Arc::clone(config);
        tokio::spawn(async move {
            if let Err(e) = crate::message_history::append_entry(&text, &id, &config).await {
                warn!("failed to append to message history: {e}");
            }
        });
    }

    pub async fn get_history_entry_request(
        sess: &Arc<Session>,
        config: &Arc<Config>,
        sub_id: String,
        offset: usize,
        log_id: u64,
    ) {
        let config = Arc::clone(config);
        let sess_clone = Arc::clone(sess);

        tokio::spawn(async move {
            // Run lookup in blocking thread because it does file IO + locking.
            let entry_opt = tokio::task::spawn_blocking(move || {
                crate::message_history::lookup(log_id, offset, &config)
            })
            .await
            .unwrap_or(None);

            let event = Event {
                id: sub_id,
                msg: EventMsg::GetHistoryEntryResponse(
                    crate::protocol::GetHistoryEntryResponseEvent {
                        offset,
                        log_id,
                        entry: entry_opt.map(|e| codex_protocol::message_history::HistoryEntry {
                            conversation_id: e.session_id,
                            ts: e.ts,
                            text: e.text,
                        }),
                    },
                ),
            };

            sess_clone.send_event_raw(event).await;
        });
    }

    pub async fn list_mcp_tools(sess: &Session, config: &Arc<Config>, sub_id: String) {
        // This is a cheap lookup from the connection manager's cache.
        let tools = sess.services.mcp_connection_manager.list_all_tools();
        let (auth_status_entries, resources, resource_templates) = tokio::join!(
            compute_auth_statuses(
                config.mcp_servers.iter(),
                config.mcp_oauth_credentials_store_mode,
            ),
            sess.services.mcp_connection_manager.list_all_resources(),
            sess.services
                .mcp_connection_manager
                .list_all_resource_templates()
        );
        let auth_statuses = auth_status_entries
            .iter()
            .map(|(name, entry)| (name.clone(), entry.auth_status))
            .collect();
        let event = Event {
            id: sub_id,
            msg: EventMsg::McpListToolsResponse(crate::protocol::McpListToolsResponseEvent {
                tools,
                resources,
                resource_templates,
                auth_statuses,
            }),
        };
        sess.send_event_raw(event).await;
    }

    pub async fn list_custom_prompts(sess: &Session, sub_id: String) {
        let custom_prompts: Vec<CustomPrompt> =
            if let Some(dir) = crate::custom_prompts::default_prompts_dir() {
                crate::custom_prompts::discover_prompts_in(&dir).await
            } else {
                Vec::new()
            };

        let event = Event {
            id: sub_id,
            msg: EventMsg::ListCustomPromptsResponse(ListCustomPromptsResponseEvent {
                custom_prompts,
            }),
        };
        sess.send_event_raw(event).await;
    }

    pub async fn undo(sess: &Arc<Session>, sub_id: String) {
        let turn_context = sess
            .new_turn_with_sub_id(sub_id, SessionSettingsUpdate::default())
            .await;
        sess.spawn_task(turn_context, Vec::new(), UndoTask::new())
            .await;
    }

    pub async fn compact(sess: &Arc<Session>, sub_id: String) {
        let turn_context = sess
            .new_turn_with_sub_id(sub_id, SessionSettingsUpdate::default())
            .await;
        // Attempt to inject input into current task
        if let Err(items) = sess
            .inject_input(vec![UserInput::Text {
                text: compact::SUMMARIZATION_PROMPT.to_string(),
            }])
            .await
        {
            sess.spawn_task(Arc::clone(&turn_context), items, CompactTask)
                .await;
        }
    }

    pub async fn shutdown(sess: &Arc<Session>, sub_id: String) -> bool {
        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
        info!("Shutting down Codex instance");

        // Gracefully flush and shutdown rollout recorder on session end so tests
        // that inspect the rollout file do not race with the background writer.
        let recorder_opt = {
            let mut guard = sess.services.rollout.lock().await;
            guard.take()
        };
        if let Some(rec) = recorder_opt
            && let Err(e) = rec.shutdown().await
        {
            warn!("failed to shutdown rollout recorder: {e}");
            let event = Event {
                id: sub_id.clone(),
                msg: EventMsg::Error(ErrorEvent {
                    message: "Failed to shutdown rollout recorder".to_string(),
                }),
            };
            sess.send_event_raw(event).await;
        }

        let event = Event {
            id: sub_id,
            msg: EventMsg::ShutdownComplete,
        };
        sess.send_event_raw(event).await;
        true
    }

    pub async fn review(
        sess: &Arc<Session>,
        config: &Arc<Config>,
        sub_id: String,
        review_request: ReviewRequest,
    ) {
        let turn_context = sess
            .new_turn_with_sub_id(sub_id.clone(), SessionSettingsUpdate::default())
            .await;
        spawn_review_thread(
            Arc::clone(sess),
            Arc::clone(config),
            turn_context.clone(),
            sub_id,
            review_request,
        )
        .await;
    }
}

/// Spawn a review thread using the given prompt.
async fn spawn_review_thread(
    sess: Arc<Session>,
    config: Arc<Config>,
    parent_turn_context: Arc<TurnContext>,
    sub_id: String,
    review_request: ReviewRequest,
) {
    let model = config.review_model.clone();
    let review_model_family = find_family_for_model(&model)
        .unwrap_or_else(|| parent_turn_context.client.get_model_family());
    // For reviews, disable web_search and view_image regardless of global settings.
    let mut review_features = config.features.clone();
    review_features.disable(crate::features::Feature::WebSearchRequest);
    review_features.disable(crate::features::Feature::ViewImageTool);
    review_features.disable(crate::features::Feature::StreamableShell);
    let tools_config = ToolsConfig::new(&ToolsConfigParams {
        model_family: &review_model_family,
        features: &review_features,
    });

    let base_instructions = REVIEW_PROMPT.to_string();
    let review_prompt = review_request.prompt.clone();
    let provider = parent_turn_context.client.get_provider();
    let auth_manager = parent_turn_context.client.get_auth_manager();
    let model_family = review_model_family.clone();

    // Build per‑turn client with the requested model/family.
    let mut per_turn_config = (*config).clone();
    per_turn_config.model = model.clone();
    per_turn_config.model_family = model_family.clone();
    per_turn_config.model_reasoning_effort = Some(ReasoningEffortConfig::Low);
    per_turn_config.model_reasoning_summary = ReasoningSummaryConfig::Detailed;
    if let Some(model_info) = get_model_info(&model_family) {
        per_turn_config.model_context_window = Some(model_info.context_window);
    }

    let otel_event_manager = parent_turn_context
        .client
        .get_otel_event_manager()
        .with_model(
            per_turn_config.model.as_str(),
            per_turn_config.model_family.slug.as_str(),
        );

    let per_turn_config = Arc::new(per_turn_config);
    let client = ModelClient::new(
        per_turn_config.clone(),
        auth_manager,
        otel_event_manager,
        provider,
        per_turn_config.model_reasoning_effort,
        per_turn_config.model_reasoning_summary,
        sess.conversation_id,
    );

    let review_turn_context = TurnContext {
        sub_id: sub_id.to_string(),
        client,
        tools_config,
        user_instructions: None,
        base_instructions: Some(base_instructions.clone()),
        approval_policy: parent_turn_context.approval_policy,
        sandbox_policy: parent_turn_context.sandbox_policy.clone(),
        shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
        cwd: parent_turn_context.cwd.clone(),
        is_review_mode: true,
        final_output_json_schema: None,
        codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
        tool_call_gate: Arc::new(ReadinessFlag::new()),
    };

    // Seed the child task with the review prompt as the initial user message.
    let input: Vec<UserInput> = vec![UserInput::Text {
        text: review_prompt,
    }];
    let tc = Arc::new(review_turn_context);
    sess.spawn_task(tc.clone(), input, ReviewTask).await;

    // Announce entering review mode so UIs can switch modes.
    sess.send_event(&tc, EventMsg::EnteredReviewMode(review_request))
        .await;
}

/// Takes a user message as input and runs a loop where, at each turn, the model
/// replies with either:
///
/// - requested function calls
/// - an assistant message
///
/// While it is possible for the model to return multiple of these items in a
/// single turn, in practice, we generally one item per turn:
///
/// - If the model requests a function call, we execute it and send the output
///   back to the model in the next turn.
/// - If the model sends only an assistant message, we record it in the
///   conversation history and consider the task complete.
///
/// Review mode: when `turn_context.is_review_mode` is true, the turn runs in an
/// isolated in-memory thread without the parent session's prior history or
/// user_instructions. Emits ExitedReviewMode upon final review message.
pub(crate) async fn run_task(
    sess: Arc<Session>,
    turn_context: Arc<TurnContext>,
    input: Vec<UserInput>,
    task_kind: TaskKind,
    cancellation_token: CancellationToken,
) -> Option<String> {
    if input.is_empty() {
        return None;
    }
    let event = EventMsg::TaskStarted(TaskStartedEvent {
        model_context_window: turn_context.client.get_model_context_window(),
    });
    sess.send_event(&turn_context, event).await;

    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
    // For review threads, keep an isolated in-memory history so the
    // model sees a fresh conversation without the parent session's history.
    // For normal turns, continue recording to the session history as before.
    let is_review_mode = turn_context.is_review_mode;

    let mut review_thread_history: ConversationHistory = ConversationHistory::new();
    if is_review_mode {
        // Seed review threads with environment context so the model knows the working directory.
        review_thread_history
            .record_items(sess.build_initial_context(turn_context.as_ref()).iter());
        review_thread_history.record_items(std::iter::once(&initial_input_for_turn.into()));
    } else {
        sess.record_input_and_rollout_usermsg(turn_context.as_ref(), &initial_input_for_turn)
            .await;
    }

    sess.maybe_start_ghost_snapshot(Arc::clone(&turn_context), cancellation_token.child_token())
        .await;
    let mut last_agent_message: Option<String> = None;
    // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
    // many turns, from the perspective of the user, it is a single turn.
    let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
    let mut auto_compact_recently_attempted = false;

    loop {
        // Note that pending_input would be something like a message the user
        // submitted through the UI while the model was running. Though the UI
        // may support this, the model might not.
        let pending_input = sess
            .get_pending_input()
            .await
            .into_iter()
            .map(ResponseItem::from)
            .collect::<Vec<ResponseItem>>();

        // Construct the input that we will send to the model.
        //
        // - For review threads, use the isolated in-memory history so the
        //   model sees a fresh conversation (no parent history/user_instructions).
        //
        // - For normal turns, use the session's full history. When using the
        //   chat completions API (or ZDR clients), the model needs the full
        //   conversation history on each turn. The rollout file, however, should
        //   only record the new items that originated in this turn so that it
        //   represents an append-only log without duplicates.
        let turn_input: Vec<ResponseItem> = if is_review_mode {
            if !pending_input.is_empty() {
                review_thread_history.record_items(&pending_input);
            }
            review_thread_history.get_history_for_prompt()
        } else {
            sess.record_conversation_items(&turn_context, &pending_input)
                .await;
            sess.clone_history().await.get_history_for_prompt()
        };

        let turn_input_messages: Vec<String> = turn_input
            .iter()
            .filter_map(|item| match item {
                ResponseItem::Message { content, .. } => Some(content),
                _ => None,
            })
            .flat_map(|content| {
                content.iter().filter_map(|item| match item {
                    ContentItem::OutputText { text } => Some(text.clone()),
                    _ => None,
                })
            })
            .collect();
        match run_turn(
            Arc::clone(&sess),
            Arc::clone(&turn_context),
            Arc::clone(&turn_diff_tracker),
            turn_input,
            task_kind,
            cancellation_token.child_token(),
        )
        .await
        {
            Ok(turn_output) => {
                let TurnRunResult {
                    processed_items,
                    total_token_usage,
                } = turn_output;
                let limit = turn_context
                    .client
                    .get_auto_compact_token_limit()
                    .unwrap_or(i64::MAX);
                let total_usage_tokens = total_token_usage
                    .as_ref()
                    .map(TokenUsage::tokens_in_context_window);
                let token_limit_reached = total_usage_tokens
                    .map(|tokens| tokens >= limit)
                    .unwrap_or(false);
                let (responses, items_to_record_in_conversation_history) = process_items(
                    processed_items,
                    is_review_mode,
                    &mut review_thread_history,
                    &sess,
                    &turn_context,
                )
                .await;

                if token_limit_reached {
                    if auto_compact_recently_attempted {
                        let limit_str = limit.to_string();
                        let current_tokens = total_usage_tokens
                            .map(|tokens| tokens.to_string())
                            .unwrap_or_else(|| "unknown".to_string());
                        let event = EventMsg::Error(ErrorEvent {
                            message: format!(
                                "Conversation is still above the token limit after automatic summarization (limit {limit_str}, current {current_tokens}). Please start a new session or trim your input."
                            ),
                        });
                        sess.send_event(&turn_context, event).await;
                        break;
                    }
                    auto_compact_recently_attempted = true;
                    compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
                    continue;
                }

                auto_compact_recently_attempted = false;

                if responses.is_empty() {
                    last_agent_message = get_last_assistant_message_from_turn(
                        &items_to_record_in_conversation_history,
                    );
                    sess.notifier()
                        .notify(&UserNotification::AgentTurnComplete {
                            thread_id: sess.conversation_id.to_string(),
                            turn_id: turn_context.sub_id.clone(),
                            cwd: turn_context.cwd.display().to_string(),
                            input_messages: turn_input_messages,
                            last_assistant_message: last_agent_message.clone(),
                        });
                    break;
                }
                continue;
            }
            Err(CodexErr::TurnAborted {
                dangling_artifacts: processed_items,
            }) => {
                let _ = process_items(
                    processed_items,
                    is_review_mode,
                    &mut review_thread_history,
                    &sess,
                    &turn_context,
                )
                .await;
                // Aborted turn is reported via a different event.
                break;
            }
            Err(e) => {
                info!("Turn error: {e:#}");
                let event = EventMsg::Error(ErrorEvent {
                    message: e.to_string(),
                });
                sess.send_event(&turn_context, event).await;
                // let the user continue the conversation
                break;
            }
        }
    }

    // If this was a review thread and we have a final assistant message,
    // try to parse it as a ReviewOutput.
    //
    // If parsing fails, construct a minimal ReviewOutputEvent using the plain
    // text as the overall explanation. Else, just exit review mode with None.
    //
    // Emits an ExitedReviewMode event with the parsed review output.
    if turn_context.is_review_mode {
        exit_review_mode(
            sess.clone(),
            Arc::clone(&turn_context),
            last_agent_message.as_deref().map(parse_review_output_event),
        )
        .await;
    }

    last_agent_message
}

/// Parse the review output; when not valid JSON, build a structured
/// fallback that carries the plain text as the overall explanation.
///
/// Returns: a ReviewOutputEvent parsed from JSON or a fallback populated from text.
fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
    // Try direct parse first
    if let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(text) {
        return ev;
    }
    // If wrapped in markdown fences or extra prose, attempt to extract the first JSON object
    if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}'))
        && start < end
        && let Some(slice) = text.get(start..=end)
        && let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(slice)
    {
        return ev;
    }
    // Not JSON – return a structured ReviewOutputEvent that carries
    // the plain text as the overall explanation.
    ReviewOutputEvent {
        overall_explanation: text.to_string(),
        ..Default::default()
    }
}

async fn run_turn(
    sess: Arc<Session>,
    turn_context: Arc<TurnContext>,
    turn_diff_tracker: SharedTurnDiffTracker,
    input: Vec<ResponseItem>,
    task_kind: TaskKind,
    cancellation_token: CancellationToken,
) -> CodexResult<TurnRunResult> {
    let mcp_tools = sess.services.mcp_connection_manager.list_all_tools();
    let router = Arc::new(ToolRouter::from_config(
        &turn_context.tools_config,
        Some(mcp_tools),
    ));

    let model_supports_parallel = turn_context
        .client
        .get_model_family()
        .supports_parallel_tool_calls;
    let parallel_tool_calls = model_supports_parallel;
    let prompt = Prompt {
        input,
        tools: router.specs(),
        parallel_tool_calls,
        base_instructions_override: turn_context.base_instructions.clone(),
        output_schema: turn_context.final_output_json_schema.clone(),
    };

    let mut retries = 0;
    loop {
        match try_run_turn(
            Arc::clone(&router),
            Arc::clone(&sess),
            Arc::clone(&turn_context),
            Arc::clone(&turn_diff_tracker),
            &prompt,
            task_kind,
            cancellation_token.child_token(),
        )
        .await
        {
            Ok(output) => return Ok(output),
            Err(CodexErr::TurnAborted {
                dangling_artifacts: processed_items,
            }) => {
                return Err(CodexErr::TurnAborted {
                    dangling_artifacts: processed_items,
                });
            }
            Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
            Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
            Err(e @ CodexErr::Fatal(_)) => return Err(e),
            Err(e @ CodexErr::ContextWindowExceeded) => {
                sess.set_total_tokens_full(turn_context.as_ref()).await;
                return Err(e);
            }
            Err(CodexErr::UsageLimitReached(e)) => {
                let rate_limits = e.rate_limits.clone();
                if let Some(rate_limits) = rate_limits {
                    sess.update_rate_limits(turn_context.as_ref(), rate_limits)
                        .await;
                }
                return Err(CodexErr::UsageLimitReached(e));
            }
            Err(CodexErr::UsageNotIncluded) => return Err(CodexErr::UsageNotIncluded),
            Err(e) => {
                // Use the configured provider-specific stream retry budget.
                let max_retries = turn_context.client.get_provider().stream_max_retries();
                if retries < max_retries {
                    retries += 1;
                    let delay = match e {
                        CodexErr::Stream(_, Some(delay)) => delay,
                        _ => backoff(retries),
                    };
                    warn!(
                        "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",
                    );

                    // Surface retry information to any UI/front‑end so the
                    // user understands what is happening instead of staring
                    // at a seemingly frozen screen.
                    sess.notify_stream_error(
                        turn_context.as_ref(),
                        format!("Reconnecting... {retries}/{max_retries}"),
                    )
                    .await;

                    tokio::time::sleep(delay).await;
                } else {
                    return Err(e);
                }
            }
        }
    }
}

/// When the model is prompted, it returns a stream of events. Some of these
/// events map to a `ResponseItem`. A `ResponseItem` may need to be
/// "handled" such that it produces a `ResponseInputItem` that needs to be
/// sent back to the model on the next turn.
#[derive(Debug)]
pub struct ProcessedResponseItem {
    pub item: ResponseItem,
    pub response: Option<ResponseInputItem>,
}

#[derive(Debug)]
struct TurnRunResult {
    processed_items: Vec<ProcessedResponseItem>,
    total_token_usage: Option<TokenUsage>,
}

#[allow(clippy::too_many_arguments)]
async fn try_run_turn(
    router: Arc<ToolRouter>,
    sess: Arc<Session>,
    turn_context: Arc<TurnContext>,
    turn_diff_tracker: SharedTurnDiffTracker,
    prompt: &Prompt,
    task_kind: TaskKind,
    cancellation_token: CancellationToken,
) -> CodexResult<TurnRunResult> {
    let rollout_item = RolloutItem::TurnContext(TurnContextItem {
        cwd: turn_context.cwd.clone(),
        approval_policy: turn_context.approval_policy,
        sandbox_policy: turn_context.sandbox_policy.clone(),
        model: turn_context.client.get_model(),
        effort: turn_context.client.get_reasoning_effort(),
        summary: turn_context.client.get_reasoning_summary(),
    });

    sess.persist_rollout_items(&[rollout_item]).await;
    let mut stream = turn_context
        .client
        .clone()
        .stream_with_task_kind(prompt, task_kind)
        .or_cancel(&cancellation_token)
        .await??;

    let tool_runtime = ToolCallRuntime::new(
        Arc::clone(&router),
        Arc::clone(&sess),
        Arc::clone(&turn_context),
        Arc::clone(&turn_diff_tracker),
    );
    let mut output: FuturesOrdered<BoxFuture<CodexResult<ProcessedResponseItem>>> =
        FuturesOrdered::new();

    loop {
        // Poll the next item from the model stream. We must inspect *both* Ok and Err
        // cases so that transient stream failures (e.g., dropped SSE connection before
        // `response.completed`) bubble up and trigger the caller's retry logic.
        let event = match stream.next().or_cancel(&cancellation_token).await {
            Ok(event) => event,
            Err(codex_async_utils::CancelErr::Cancelled) => {
                let processed_items = output.try_collect().await?;
                return Err(CodexErr::TurnAborted {
                    dangling_artifacts: processed_items,
                });
            }
        };

        let event = match event {
            Some(res) => res?,
            None => {
                return Err(CodexErr::Stream(
                    "stream closed before response.completed".into(),
                    None,
                ));
            }
        };

        let add_completed = &mut |response_item: ProcessedResponseItem| {
            output.push_back(future::ready(Ok(response_item)).boxed());
        };

        match event {
            ResponseEvent::Created => {}
            ResponseEvent::OutputItemDone(item) => {
                match ToolRouter::build_tool_call(sess.as_ref(), item.clone()) {
                    Ok(Some(call)) => {
                        let payload_preview = call.payload.log_payload().into_owned();
                        tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);

                        let response =
                            tool_runtime.handle_tool_call(call, cancellation_token.child_token());

                        output.push_back(
                            async move {
                                Ok(ProcessedResponseItem {
                                    item,
                                    response: Some(response.await?),
                                })
                            }
                            .boxed(),
                        );
                    }
                    Ok(None) => {
                        let response = handle_non_tool_response_item(
                            sess.as_ref(),
                            Arc::clone(&turn_context),
                            item.clone(),
                            sess.show_raw_agent_reasoning(),
                        )
                        .await?;
                        add_completed(ProcessedResponseItem { item, response });
                    }
                    Err(FunctionCallError::MissingLocalShellCallId) => {
                        let msg = "LocalShellCall without call_id or id";
                        turn_context
                            .client
                            .get_otel_event_manager()
                            .log_tool_failed("local_shell", msg);
                        error!(msg);

                        let response = ResponseInputItem::FunctionCallOutput {
                            call_id: String::new(),
                            output: FunctionCallOutputPayload {
                                content: msg.to_string(),
                                ..Default::default()
                            },
                        };
                        add_completed(ProcessedResponseItem {
                            item,
                            response: Some(response),
                        });
                    }
                    Err(FunctionCallError::RespondToModel(message))
                    | Err(FunctionCallError::Denied(message)) => {
                        let response = ResponseInputItem::FunctionCallOutput {
                            call_id: String::new(),
                            output: FunctionCallOutputPayload {
                                content: message,
                                ..Default::default()
                            },
                        };
                        add_completed(ProcessedResponseItem {
                            item,
                            response: Some(response),
                        });
                    }
                    Err(FunctionCallError::Fatal(message)) => {
                        return Err(CodexErr::Fatal(message));
                    }
                }
            }
            ResponseEvent::WebSearchCallBegin { call_id } => {
                let _ = sess
                    .tx_event
                    .send(Event {
                        id: turn_context.sub_id.clone(),
                        msg: EventMsg::WebSearchBegin(WebSearchBeginEvent { call_id }),
                    })
                    .await;
            }
            ResponseEvent::RateLimits(snapshot) => {
                // Update internal state with latest rate limits, but defer sending until
                // token usage is available to avoid duplicate TokenCount events.
                sess.update_rate_limits(turn_context.as_ref(), snapshot)
                    .await;
            }
            ResponseEvent::Completed {
                response_id: _,
                token_usage,
            } => {
                sess.update_token_usage_info(turn_context.as_ref(), token_usage.as_ref())
                    .await;
                let processed_items = output.try_collect().await?;
                let unified_diff = {
                    let mut tracker = turn_diff_tracker.lock().await;
                    tracker.get_unified_diff()
                };
                if let Ok(Some(unified_diff)) = unified_diff {
                    let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
                    sess.send_event(&turn_context, msg).await;
                }

                let result = TurnRunResult {
                    processed_items,
                    total_token_usage: token_usage.clone(),
                };

                return Ok(result);
            }
            ResponseEvent::OutputTextDelta(delta) => {
                // In review child threads, suppress assistant text deltas; the
                // UI will show a selection popup from the final ReviewOutput.
                if !turn_context.is_review_mode {
                    let event = EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta });
                    sess.send_event(&turn_context, event).await;
                } else {
                    trace!("suppressing OutputTextDelta in review mode");
                }
            }
            ResponseEvent::ReasoningSummaryDelta(delta) => {
                let event = EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta });
                sess.send_event(&turn_context, event).await;
            }
            ResponseEvent::ReasoningSummaryPartAdded => {
                let event =
                    EventMsg::AgentReasoningSectionBreak(AgentReasoningSectionBreakEvent {});
                sess.send_event(&turn_context, event).await;
            }
            ResponseEvent::ReasoningContentDelta(delta) => {
                if sess.show_raw_agent_reasoning() {
                    let event = EventMsg::AgentReasoningRawContentDelta(
                        AgentReasoningRawContentDeltaEvent { delta },
                    );
                    sess.send_event(&turn_context, event).await;
                }
            }
        }
    }
}

async fn handle_non_tool_response_item(
    sess: &Session,
    turn_context: Arc<TurnContext>,
    item: ResponseItem,
    show_raw_agent_reasoning: bool,
) -> CodexResult<Option<ResponseInputItem>> {
    debug!(?item, "Output item");

    match &item {
        ResponseItem::Message { .. }
        | ResponseItem::Reasoning { .. }
        | ResponseItem::WebSearchCall { .. } => {
            let turn_item = match &item {
                ResponseItem::Message { .. } if turn_context.is_review_mode => {
                    trace!("suppressing assistant Message in review mode");
                    None
                }
                _ => parse_turn_item(&item),
            };
            if let Some(turn_item) = turn_item {
                sess.emit_turn_item_started_completed(
                    turn_context.as_ref(),
                    turn_item,
                    show_raw_agent_reasoning,
                )
                .await;
            }
        }
        ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. } => {
            debug!("unexpected tool output from stream");
        }
        _ => {}
    }

    Ok(None)
}

pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<String> {
    responses.iter().rev().find_map(|item| {
        if let ResponseItem::Message { role, content, .. } = item {
            if role == "assistant" {
                content.iter().rev().find_map(|ci| {
                    if let ContentItem::OutputText { text } = ci {
                        Some(text.clone())
                    } else {
                        None
                    }
                })
            } else {
                None
            }
        } else {
            None
        }
    })
}
/// Emits an ExitedReviewMode Event with optional ReviewOutput,
/// and records a developer message with the review output.
pub(crate) async fn exit_review_mode(
    session: Arc<Session>,
    turn_context: Arc<TurnContext>,
    review_output: Option<ReviewOutputEvent>,
) {
    let event = EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
        review_output: review_output.clone(),
    });
    session.send_event(turn_context.as_ref(), event).await;

    let mut user_message = String::new();
    if let Some(out) = review_output {
        let mut findings_str = String::new();
        let text = out.overall_explanation.trim();
        if !text.is_empty() {
            findings_str.push_str(text);
        }
        if !out.findings.is_empty() {
            let block = format_review_findings_block(&out.findings, None);
            findings_str.push_str(&format!("\n{block}"));
        }
        user_message.push_str(&format!(
            r#"<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>
"#));
    } else {
        user_message.push_str(r#"<user_action>
  <context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
  <action>review</action>
  <results>
  None.
  </results>
</user_action>
"#);
    }

    session
        .record_conversation_items(
            &turn_context,
            &[ResponseItem::Message {
                id: None,
                role: "user".to_string(),
                content: vec![ContentItem::InputText { text: user_message }],
            }],
        )
        .await;
    // Make the recorded review note visible immediately for readers.
    session.flush_rollout().await;
}

fn mcp_init_error_display(
    server_name: &str,
    entry: Option<&McpAuthStatusEntry>,
    err: &anyhow::Error,
) -> String {
    if let Some(McpServerTransportConfig::StreamableHttp {
        url,
        bearer_token_env_var,
        http_headers,
        ..
    }) = &entry.map(|entry| &entry.config.transport)
        && url == "https://api.githubcopilot.com/mcp/"
        && bearer_token_env_var.is_none()
        && http_headers.as_ref().map(HashMap::is_empty).unwrap_or(true)
    {
        // GitHub only supports OAUth for first party MCP clients.
        // That means that the user has to specify a personal access token either via bearer_token_env_var or http_headers.
        // https://github.com/github/github-mcp-server/issues/921#issuecomment-3221026448
        format!(
            "GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
        )
    } else if is_mcp_client_auth_required_error(err) {
        format!(
            "The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
        )
    } else if is_mcp_client_startup_timeout_error(err) {
        let startup_timeout_secs = match entry {
            Some(entry) => match entry.config.startup_timeout_sec {
                Some(timeout) => timeout,
                None => DEFAULT_STARTUP_TIMEOUT,
            },
            None => DEFAULT_STARTUP_TIMEOUT,
        }
        .as_secs();
        format!(
            "MCP client for `{server_name}` timed out after {startup_timeout_secs} seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.{server_name}]\nstartup_timeout_sec = XX"
        )
    } else {
        format!("MCP client for `{server_name}` failed to start: {err:#}")
    }
}

fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
    // StreamableHttpError::AuthRequired from the MCP SDK.
    error.to_string().contains("Auth required")
}

fn is_mcp_client_startup_timeout_error(error: &anyhow::Error) -> bool {
    let error_message = error.to_string();
    error_message.contains("request timed out")
        || error_message.contains("timed out handshaking with MCP server")
}

use crate::features::Feature;
use crate::features::Features;
#[cfg(test)]
pub(crate) use tests::make_session_and_context;

#[cfg(test)]
mod tests {
    use super::*;
    use crate::config::ConfigOverrides;
    use crate::config::ConfigToml;
    use crate::config_types::McpServerConfig;
    use crate::config_types::McpServerTransportConfig;
    use crate::exec::ExecToolCallOutput;
    use crate::mcp::auth::McpAuthStatusEntry;
    use crate::tools::format_exec_output_str;

    use crate::protocol::CompactedItem;
    use crate::protocol::InitialHistory;
    use crate::protocol::ResumedHistory;
    use crate::state::TaskKind;
    use crate::tasks::SessionTask;
    use crate::tasks::SessionTaskContext;
    use crate::tools::ToolRouter;
    use crate::tools::context::ToolInvocation;
    use crate::tools::context::ToolOutput;
    use crate::tools::context::ToolPayload;
    use crate::tools::handlers::ShellHandler;
    use crate::tools::registry::ToolHandler;
    use crate::turn_diff_tracker::TurnDiffTracker;
    use codex_app_server_protocol::AuthMode;
    use codex_protocol::models::ContentItem;
    use codex_protocol::models::ResponseItem;
    use codex_protocol::protocol::McpAuthStatus;
    use std::time::Duration;
    use tokio::time::sleep;

    use mcp_types::ContentBlock;
    use mcp_types::TextContent;
    use pretty_assertions::assert_eq;
    use serde::Deserialize;
    use serde_json::json;
    use std::path::PathBuf;
    use std::sync::Arc;
    use std::time::Duration as StdDuration;

    #[test]
    fn reconstruct_history_matches_live_compactions() {
        let (session, turn_context) = make_session_and_context();
        let (rollout_items, expected) = sample_rollout(&session, &turn_context);

        let reconstructed = session.reconstruct_history_from_rollout(&turn_context, &rollout_items);

        assert_eq!(expected, reconstructed);
    }

    #[test]
    fn record_initial_history_reconstructs_resumed_transcript() {
        let (session, turn_context) = make_session_and_context();
        let (rollout_items, expected) = sample_rollout(&session, &turn_context);

        tokio_test::block_on(session.record_initial_history(InitialHistory::Resumed(
            ResumedHistory {
                conversation_id: ConversationId::default(),
                history: rollout_items,
                rollout_path: PathBuf::from("/tmp/resume.jsonl"),
            },
        )));

        let actual = tokio_test::block_on(async {
            session.state.lock().await.clone_history().get_history()
        });
        assert_eq!(expected, actual);
    }

    #[test]
    fn record_initial_history_reconstructs_forked_transcript() {
        let (session, turn_context) = make_session_and_context();
        let (rollout_items, expected) = sample_rollout(&session, &turn_context);

        tokio_test::block_on(session.record_initial_history(InitialHistory::Forked(rollout_items)));

        let actual = tokio_test::block_on(async {
            session.state.lock().await.clone_history().get_history()
        });
        assert_eq!(expected, actual);
    }

    #[test]
    fn prefers_structured_content_when_present() {
        let ctr = CallToolResult {
            // Content present but should be ignored because structured_content is set.
            content: vec![text_block("ignored")],
            is_error: None,
            structured_content: Some(json!({
                "ok": true,
                "value": 42
            })),
        };

        let got = FunctionCallOutputPayload::from(&ctr);
        let expected = FunctionCallOutputPayload {
            content: serde_json::to_string(&json!({
                "ok": true,
                "value": 42
            }))
            .unwrap(),
            success: Some(true),
            ..Default::default()
        };

        assert_eq!(expected, got);
    }

    #[test]
    fn includes_timed_out_message() {
        let exec = ExecToolCallOutput {
            exit_code: 0,
            stdout: StreamOutput::new(String::new()),
            stderr: StreamOutput::new(String::new()),
            aggregated_output: StreamOutput::new("Command output".to_string()),
            duration: StdDuration::from_secs(1),
            timed_out: true,
        };

        let out = format_exec_output_str(&exec);

        assert_eq!(
            out,
            "command timed out after 1000 milliseconds\nCommand output"
        );
    }

    #[test]
    fn falls_back_to_content_when_structured_is_null() {
        let ctr = CallToolResult {
            content: vec![text_block("hello"), text_block("world")],
            is_error: None,
            structured_content: Some(serde_json::Value::Null),
        };

        let got = FunctionCallOutputPayload::from(&ctr);
        let expected = FunctionCallOutputPayload {
            content: serde_json::to_string(&vec![text_block("hello"), text_block("world")])
                .unwrap(),
            success: Some(true),
            ..Default::default()
        };

        assert_eq!(expected, got);
    }

    #[test]
    fn success_flag_reflects_is_error_true() {
        let ctr = CallToolResult {
            content: vec![text_block("unused")],
            is_error: Some(true),
            structured_content: Some(json!({ "message": "bad" })),
        };

        let got = FunctionCallOutputPayload::from(&ctr);
        let expected = FunctionCallOutputPayload {
            content: serde_json::to_string(&json!({ "message": "bad" })).unwrap(),
            success: Some(false),
            ..Default::default()
        };

        assert_eq!(expected, got);
    }

    #[test]
    fn success_flag_true_with_no_error_and_content_used() {
        let ctr = CallToolResult {
            content: vec![text_block("alpha")],
            is_error: Some(false),
            structured_content: None,
        };

        let got = FunctionCallOutputPayload::from(&ctr);
        let expected = FunctionCallOutputPayload {
            content: serde_json::to_string(&vec![text_block("alpha")]).unwrap(),
            success: Some(true),
            ..Default::default()
        };

        assert_eq!(expected, got);
    }

    fn text_block(s: &str) -> ContentBlock {
        ContentBlock::TextContent(TextContent {
            annotations: None,
            text: s.to_string(),
            r#type: "text".to_string(),
        })
    }

    fn otel_event_manager(conversation_id: ConversationId, config: &Config) -> OtelEventManager {
        OtelEventManager::new(
            conversation_id,
            config.model.as_str(),
            config.model_family.slug.as_str(),
            None,
            Some("test@test.com".to_string()),
            Some(AuthMode::ChatGPT),
            false,
            "test".to_string(),
        )
    }

    pub(crate) fn make_session_and_context() -> (Session, TurnContext) {
        let (tx_event, _rx_event) = async_channel::unbounded();
        let codex_home = tempfile::tempdir().expect("create temp dir");
        let config = Config::load_from_base_config_with_overrides(
            ConfigToml::default(),
            ConfigOverrides::default(),
            codex_home.path().to_path_buf(),
        )
        .expect("load default test config");
        let config = Arc::new(config);
        let conversation_id = ConversationId::default();
        let otel_event_manager = otel_event_manager(conversation_id, config.as_ref());
        let auth_manager = AuthManager::shared(
            config.cwd.clone(),
            false,
            config.cli_auth_credentials_store_mode,
        );

        let session_configuration = SessionConfiguration {
            provider: config.model_provider.clone(),
            model: config.model.clone(),
            model_reasoning_effort: config.model_reasoning_effort,
            model_reasoning_summary: config.model_reasoning_summary,
            user_instructions: config.user_instructions.clone(),
            base_instructions: config.base_instructions.clone(),
            approval_policy: config.approval_policy,
            sandbox_policy: config.sandbox_policy.clone(),
            cwd: config.cwd.clone(),
            original_config_do_not_use: Arc::clone(&config),
            features: Features::default(),
        };

        let state = SessionState::new(session_configuration.clone());

        let services = SessionServices {
            mcp_connection_manager: McpConnectionManager::default(),
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::new(None),
            rollout: Mutex::new(None),
            user_shell: shell::Shell::Unknown,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
            auth_manager: Arc::clone(&auth_manager),
            otel_event_manager: otel_event_manager.clone(),
            tool_approvals: Mutex::new(ApprovalStore::default()),
        };

        let turn_context = Session::make_turn_context(
            Some(Arc::clone(&auth_manager)),
            &otel_event_manager,
            session_configuration.provider.clone(),
            &session_configuration,
            conversation_id,
            "turn_id".to_string(),
        );

        let session = Session {
            conversation_id,
            tx_event,
            state: Mutex::new(state),
            active_turn: Mutex::new(None),
            services,
            next_internal_sub_id: AtomicU64::new(0),
        };

        (session, turn_context)
    }

    // Like make_session_and_context, but returns Arc<Session> and the event receiver
    // so tests can assert on emitted events.
    fn make_session_and_context_with_rx() -> (
        Arc<Session>,
        Arc<TurnContext>,
        async_channel::Receiver<Event>,
    ) {
        let (tx_event, rx_event) = async_channel::unbounded();
        let codex_home = tempfile::tempdir().expect("create temp dir");
        let config = Config::load_from_base_config_with_overrides(
            ConfigToml::default(),
            ConfigOverrides::default(),
            codex_home.path().to_path_buf(),
        )
        .expect("load default test config");
        let config = Arc::new(config);
        let conversation_id = ConversationId::default();
        let otel_event_manager = otel_event_manager(conversation_id, config.as_ref());
        let auth_manager = AuthManager::shared(
            config.cwd.clone(),
            false,
            config.cli_auth_credentials_store_mode,
        );

        let session_configuration = SessionConfiguration {
            provider: config.model_provider.clone(),
            model: config.model.clone(),
            model_reasoning_effort: config.model_reasoning_effort,
            model_reasoning_summary: config.model_reasoning_summary,
            user_instructions: config.user_instructions.clone(),
            base_instructions: config.base_instructions.clone(),
            approval_policy: config.approval_policy,
            sandbox_policy: config.sandbox_policy.clone(),
            cwd: config.cwd.clone(),
            original_config_do_not_use: Arc::clone(&config),
            features: Features::default(),
        };

        let state = SessionState::new(session_configuration.clone());

        let services = SessionServices {
            mcp_connection_manager: McpConnectionManager::default(),
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::new(None),
            rollout: Mutex::new(None),
            user_shell: shell::Shell::Unknown,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
            auth_manager: Arc::clone(&auth_manager),
            otel_event_manager: otel_event_manager.clone(),
            tool_approvals: Mutex::new(ApprovalStore::default()),
        };

        let turn_context = Arc::new(Session::make_turn_context(
            Some(Arc::clone(&auth_manager)),
            &otel_event_manager,
            session_configuration.provider.clone(),
            &session_configuration,
            conversation_id,
            "turn_id".to_string(),
        ));

        let session = Arc::new(Session {
            conversation_id,
            tx_event,
            state: Mutex::new(state),
            active_turn: Mutex::new(None),
            services,
            next_internal_sub_id: AtomicU64::new(0),
        });

        (session, turn_context, rx_event)
    }

    #[derive(Clone, Copy)]
    struct NeverEndingTask {
        kind: TaskKind,
        listen_to_cancellation_token: bool,
    }

    #[async_trait::async_trait]
    impl SessionTask for NeverEndingTask {
        fn kind(&self) -> TaskKind {
            self.kind
        }

        async fn run(
            self: Arc<Self>,
            _session: Arc<SessionTaskContext>,
            _ctx: Arc<TurnContext>,
            _input: Vec<UserInput>,
            cancellation_token: CancellationToken,
        ) -> Option<String> {
            if self.listen_to_cancellation_token {
                cancellation_token.cancelled().await;
                return None;
            }
            loop {
                sleep(Duration::from_secs(60)).await;
            }
        }

        async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
            if let TaskKind::Review = self.kind {
                exit_review_mode(session.clone_session(), ctx, None).await;
            }
        }
    }

    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
    #[test_log::test]
    async fn abort_regular_task_emits_turn_aborted_only() {
        let (sess, tc, rx) = make_session_and_context_with_rx();
        let input = vec![UserInput::Text {
            text: "hello".to_string(),
        }];
        sess.spawn_task(
            Arc::clone(&tc),
            input,
            NeverEndingTask {
                kind: TaskKind::Regular,
                listen_to_cancellation_token: false,
            },
        )
        .await;

        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;

        let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
            .await
            .expect("timeout waiting for event")
            .expect("event");
        match evt.msg {
            EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason),
            other => panic!("unexpected event: {other:?}"),
        }
        assert!(rx.try_recv().is_err());
    }

    #[tokio::test]
    async fn abort_gracefuly_emits_turn_aborted_only() {
        let (sess, tc, rx) = make_session_and_context_with_rx();
        let input = vec![UserInput::Text {
            text: "hello".to_string(),
        }];
        sess.spawn_task(
            Arc::clone(&tc),
            input,
            NeverEndingTask {
                kind: TaskKind::Regular,
                listen_to_cancellation_token: true,
            },
        )
        .await;

        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;

        let evt = rx.recv().await.expect("event");
        match evt.msg {
            EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason),
            other => panic!("unexpected event: {other:?}"),
        }
        assert!(rx.try_recv().is_err());
    }

    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
    async fn abort_review_task_emits_exited_then_aborted_and_records_history() {
        let (sess, tc, rx) = make_session_and_context_with_rx();
        let input = vec![UserInput::Text {
            text: "start review".to_string(),
        }];
        sess.spawn_task(
            Arc::clone(&tc),
            input,
            NeverEndingTask {
                kind: TaskKind::Review,
                listen_to_cancellation_token: false,
            },
        )
        .await;

        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;

        let first = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
            .await
            .expect("timeout waiting for first event")
            .expect("first event");
        match first.msg {
            EventMsg::ExitedReviewMode(ev) => assert!(ev.review_output.is_none()),
            other => panic!("unexpected first event: {other:?}"),
        }
        loop {
            let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
                .await
                .expect("timeout waiting for next event")
                .expect("event");
            match evt.msg {
                EventMsg::RawResponseItem(_) => continue,
                EventMsg::TurnAborted(e) => {
                    assert_eq!(TurnAbortReason::Interrupted, e.reason);
                    break;
                }
                other => panic!("unexpected second event: {other:?}"),
            }
        }

        let history = sess.clone_history().await.get_history();
        let found = history.iter().any(|item| match item {
            ResponseItem::Message { role, content, .. } if role == "user" => {
                content.iter().any(|ci| match ci {
                    ContentItem::InputText { text } => {
                        text.contains("<user_action>")
                            && text.contains("review")
                            && text.contains("interrupted")
                    }
                    _ => false,
                })
            }
            _ => false,
        });
        assert!(
            found,
            "synthetic review interruption not recorded in history"
        );
    }

    #[tokio::test]
    async fn fatal_tool_error_stops_turn_and_reports_error() {
        let (session, turn_context, _rx) = make_session_and_context_with_rx();
        let router = ToolRouter::from_config(
            &turn_context.tools_config,
            Some(session.services.mcp_connection_manager.list_all_tools()),
        );
        let item = ResponseItem::CustomToolCall {
            id: None,
            status: None,
            call_id: "call-1".to_string(),
            name: "shell".to_string(),
            input: "{}".to_string(),
        };

        let call = ToolRouter::build_tool_call(session.as_ref(), item.clone())
            .expect("build tool call")
            .expect("tool call present");
        let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
        let err = router
            .dispatch_tool_call(
                Arc::clone(&session),
                Arc::clone(&turn_context),
                tracker,
                call,
            )
            .await
            .expect_err("expected fatal error");

        match err {
            FunctionCallError::Fatal(message) => {
                assert_eq!(message, "tool shell invoked with incompatible payload");
            }
            other => panic!("expected FunctionCallError::Fatal, got {other:?}"),
        }
    }

    fn sample_rollout(
        session: &Session,
        turn_context: &TurnContext,
    ) -> (Vec<RolloutItem>, Vec<ResponseItem>) {
        let mut rollout_items = Vec::new();
        let mut live_history = ConversationHistory::new();

        let initial_context = session.build_initial_context(turn_context);
        for item in &initial_context {
            rollout_items.push(RolloutItem::ResponseItem(item.clone()));
        }
        live_history.record_items(initial_context.iter());

        let user1 = ResponseItem::Message {
            id: None,
            role: "user".to_string(),
            content: vec![ContentItem::InputText {
                text: "first user".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&user1));
        rollout_items.push(RolloutItem::ResponseItem(user1.clone()));

        let assistant1 = ResponseItem::Message {
            id: None,
            role: "assistant".to_string(),
            content: vec![ContentItem::OutputText {
                text: "assistant reply one".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&assistant1));
        rollout_items.push(RolloutItem::ResponseItem(assistant1.clone()));

        let summary1 = "summary one";
        let snapshot1 = live_history.get_history();
        let user_messages1 = collect_user_messages(&snapshot1);
        let rebuilt1 = build_compacted_history(
            session.build_initial_context(turn_context),
            &user_messages1,
            summary1,
        );
        live_history.replace(rebuilt1);
        rollout_items.push(RolloutItem::Compacted(CompactedItem {
            message: summary1.to_string(),
        }));

        let user2 = ResponseItem::Message {
            id: None,
            role: "user".to_string(),
            content: vec![ContentItem::InputText {
                text: "second user".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&user2));
        rollout_items.push(RolloutItem::ResponseItem(user2.clone()));

        let assistant2 = ResponseItem::Message {
            id: None,
            role: "assistant".to_string(),
            content: vec![ContentItem::OutputText {
                text: "assistant reply two".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&assistant2));
        rollout_items.push(RolloutItem::ResponseItem(assistant2.clone()));

        let summary2 = "summary two";
        let snapshot2 = live_history.get_history();
        let user_messages2 = collect_user_messages(&snapshot2);
        let rebuilt2 = build_compacted_history(
            session.build_initial_context(turn_context),
            &user_messages2,
            summary2,
        );
        live_history.replace(rebuilt2);
        rollout_items.push(RolloutItem::Compacted(CompactedItem {
            message: summary2.to_string(),
        }));

        let user3 = ResponseItem::Message {
            id: None,
            role: "user".to_string(),
            content: vec![ContentItem::InputText {
                text: "third user".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&user3));
        rollout_items.push(RolloutItem::ResponseItem(user3.clone()));

        let assistant3 = ResponseItem::Message {
            id: None,
            role: "assistant".to_string(),
            content: vec![ContentItem::OutputText {
                text: "assistant reply three".to_string(),
            }],
        };
        live_history.record_items(std::iter::once(&assistant3));
        rollout_items.push(RolloutItem::ResponseItem(assistant3.clone()));

        (rollout_items, live_history.get_history())
    }

    #[tokio::test]
    async fn rejects_escalated_permissions_when_policy_not_on_request() {
        use crate::exec::ExecParams;
        use crate::protocol::AskForApproval;
        use crate::protocol::SandboxPolicy;
        use crate::turn_diff_tracker::TurnDiffTracker;
        use std::collections::HashMap;

        let (session, mut turn_context_raw) = make_session_and_context();
        // Ensure policy is NOT OnRequest so the early rejection path triggers
        turn_context_raw.approval_policy = AskForApproval::OnFailure;
        let session = Arc::new(session);
        let mut turn_context = Arc::new(turn_context_raw);

        let params = ExecParams {
            command: if cfg!(windows) {
                vec![
                    "cmd.exe".to_string(),
                    "/C".to_string(),
                    "echo hi".to_string(),
                ]
            } else {
                vec![
                    "/bin/sh".to_string(),
                    "-c".to_string(),
                    "echo hi".to_string(),
                ]
            },
            cwd: turn_context.cwd.clone(),
            timeout_ms: Some(1000),
            env: HashMap::new(),
            with_escalated_permissions: Some(true),
            justification: Some("test".to_string()),
            arg0: None,
        };

        let params2 = ExecParams {
            with_escalated_permissions: Some(false),
            ..params.clone()
        };

        let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));

        let tool_name = "shell";
        let call_id = "test-call".to_string();

        let handler = ShellHandler;
        let resp = handler
            .handle(ToolInvocation {
                session: Arc::clone(&session),
                turn: Arc::clone(&turn_context),
                tracker: Arc::clone(&turn_diff_tracker),
                call_id,
                tool_name: tool_name.to_string(),
                payload: ToolPayload::Function {
                    arguments: serde_json::json!({
                        "command": params.command.clone(),
                        "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
                        "timeout_ms": params.timeout_ms,
                        "with_escalated_permissions": params.with_escalated_permissions,
                        "justification": params.justification.clone(),
                    })
                    .to_string(),
                },
            })
            .await;

        let Err(FunctionCallError::RespondToModel(output)) = resp else {
            panic!("expected error result");
        };

        let expected = format!(
            "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
            policy = turn_context.approval_policy
        );

        pretty_assertions::assert_eq!(output, expected);

        // Now retry the same command WITHOUT escalated permissions; should succeed.
        // Force DangerFullAccess to avoid platform sandbox dependencies in tests.
        Arc::get_mut(&mut turn_context)
            .expect("unique turn context Arc")
            .sandbox_policy = SandboxPolicy::DangerFullAccess;

        let resp2 = handler
            .handle(ToolInvocation {
                session: Arc::clone(&session),
                turn: Arc::clone(&turn_context),
                tracker: Arc::clone(&turn_diff_tracker),
                call_id: "test-call-2".to_string(),
                tool_name: tool_name.to_string(),
                payload: ToolPayload::Function {
                    arguments: serde_json::json!({
                        "command": params2.command.clone(),
                        "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
                        "timeout_ms": params2.timeout_ms,
                        "with_escalated_permissions": params2.with_escalated_permissions,
                        "justification": params2.justification.clone(),
                    })
                    .to_string(),
                },
            })
            .await;

        let output = match resp2.expect("expected Ok result") {
            ToolOutput::Function { content, .. } => content,
            _ => panic!("unexpected tool output"),
        };

        #[derive(Deserialize, PartialEq, Eq, Debug)]
        struct ResponseExecMetadata {
            exit_code: i32,
        }

        #[derive(Deserialize)]
        struct ResponseExecOutput {
            output: String,
            metadata: ResponseExecMetadata,
        }

        let exec_output: ResponseExecOutput =
            serde_json::from_str(&output).expect("valid exec output json");

        pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
        assert!(exec_output.output.contains("hi"));
    }

    #[test]
    fn mcp_init_error_display_prompts_for_github_pat() {
        let server_name = "github";
        let entry = McpAuthStatusEntry {
            config: McpServerConfig {
                transport: McpServerTransportConfig::StreamableHttp {
                    url: "https://api.githubcopilot.com/mcp/".to_string(),
                    bearer_token_env_var: None,
                    http_headers: None,
                    env_http_headers: None,
                },
                enabled: true,
                startup_timeout_sec: None,
                tool_timeout_sec: None,
                enabled_tools: None,
                disabled_tools: None,
            },
            auth_status: McpAuthStatus::Unsupported,
        };
        let err = anyhow::anyhow!("OAuth is unsupported");

        let display = mcp_init_error_display(server_name, Some(&entry), &err);

        let expected = format!(
            "GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
        );

        assert_eq!(expected, display);
    }

    #[test]
    fn mcp_init_error_display_prompts_for_login_when_auth_required() {
        let server_name = "example";
        let err = anyhow::anyhow!("Auth required for server");

        let display = mcp_init_error_display(server_name, None, &err);

        let expected = format!(
            "The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
        );

        assert_eq!(expected, display);
    }

    #[test]
    fn mcp_init_error_display_reports_generic_errors() {
        let server_name = "custom";
        let entry = McpAuthStatusEntry {
            config: McpServerConfig {
                transport: McpServerTransportConfig::StreamableHttp {
                    url: "https://example.com".to_string(),
                    bearer_token_env_var: Some("TOKEN".to_string()),
                    http_headers: None,
                    env_http_headers: None,
                },
                enabled: true,
                startup_timeout_sec: None,
                tool_timeout_sec: None,
                enabled_tools: None,
                disabled_tools: None,
            },
            auth_status: McpAuthStatus::Unsupported,
        };
        let err = anyhow::anyhow!("boom");

        let display = mcp_init_error_display(server_name, Some(&entry), &err);

        let expected = format!("MCP client for `{server_name}` failed to start: {err:#}");

        assert_eq!(expected, display);
    }

    #[test]
    fn mcp_init_error_display_includes_startup_timeout_hint() {
        let server_name = "slow";
        let err = anyhow::anyhow!("request timed out");

        let display = mcp_init_error_display(server_name, None, &err);

        assert_eq!(
            "MCP client for `slow` timed out after 10 seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.slow]\nstartup_timeout_sec = XX",
            display
        );
    }
}
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
+								use std::collections::HashMap;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								use std::fmt::Debug;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use std::path::PathBuf;
 								use std::sync::Arc;
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								use std::sync::atomic::AtomicU64;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Move CodexAuth and AuthManager to the core crate (#3074)

Fix a long standing layering issue.
											
										
										
											2025-09-02 18:36:19 -07:00
+								use crate::AuthManager;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								use crate::client_common::REVIEW_PROMPT;
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								use crate::function_tool::FunctionCallError;
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
+								use crate::mcp::auth::McpAuthStatusEntry;
-												[MCP] Improve startup errors for timeouts and github (#5595)

1. I have seen too many reports of people hitting startup timeout errors
and thinking Codex is broken. Hopefully this will help people
self-serve. We may also want to consider raising the timeout to ~15s.
2. Make it more clear what PAT is (personal access token) in the GitHub
error

<img width="2378" height="674" alt="CleanShot 2025-10-23 at 22 05 06"
src="https://github.com/user-attachments/assets/d148ce1d-ade3-4511-84a4-c164aefdb5c5"
/>
											
										
										
											2025-10-23 22:54:45 -07:00
+								use crate::mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								use crate::parse_command::parse_command;
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								use crate::parse_turn_item;
-												Handle cancelling/aborting while processing a turn (#5543)

Currently we collect all all turn items in a vector, then we add it to
the history on success. This result in losing those items on errors
including aborting `ctrl+c`.

This PR:
- Adds the ability for the tool call to handle cancellation
- bubble the turn items up to where we are recording this info

Admittedly, this logic is an ad-hoc logic that doesn't handle a lot of
error edge cases. The right thing to do is recording to the history on
the spot as `items`/`tool calls output` come. However, this isn't
possible because of having different `task_kind` that has different
`conversation_histories`. The `try_run_turn` has no idea what thread are
we using. We cannot also pass an `arc` to the `conversation_histories`
because it's a private element of `state`.

That's said, `abort` is the most common case and we should cover it
until we remove `task kind`
											
										
										
											2025-10-23 08:47:10 -07:00
+								use crate::response_processing::process_items;
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
+								use crate::review_format::format_review_findings_block;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								use crate::terminal;
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								use crate::user_notification::UserNotifier;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use async_channel::Receiver;
 								use async_channel::Sender;
-												fix: ensure apply_patch resolves relative paths against workdir or project cwd (#810)

https://github.com/openai/codex/pull/800 kicked off some work to be more
disciplined about honoring the `cwd` param passed in rather than
assuming `std::env::current_dir()` as the `cwd`. As part of this, we
need to ensure `apply_patch` calls honor the appropriate `cwd` as well,
which is significant if the paths in the `apply_patch` arg are not
absolute paths themselves. Failing that:

- The `apply_patch` function call can contain an optional`workdir`
param, so:
- If specified and is an absolute path, it should be used to resolve
relative paths
- If specified and is a relative path, should be resolved against
`Config.cwd` and then any relative paths will be resolved against the
result
- If `workdir` is not specified on the function call, relative paths
should be resolved against `Config.cwd`

Note that we had a similar issue in the TypeScript CLI that was fixed in
https://github.com/openai/codex/pull/556.

As part of the fix, this PR introduces `ApplyPatchAction` so clients can
deal with that instead of the raw `HashMap<PathBuf,
ApplyPatchFileChange>`. This enables us to enforce, by construction,
that all paths contained in the `ApplyPatchAction` are absolute paths.
											
										
										
											2025-05-04 12:32:51 -07:00
+								use codex_apply_patch::ApplyPatchAction;
-												fix: remove mcp-types from app server protocol (#4537)

We continue the separation between `codex app-server` and `codex
mcp-server`.

In particular, we introduce a new crate, `codex-app-server-protocol`,
and migrate `codex-rs/protocol/src/mcp_protocol.rs` into it, renaming it
`codex-rs/app-server-protocol/src/protocol.rs`.

Because `ConversationId` was defined in `mcp_protocol.rs`, we move it
into its own file, `codex-rs/protocol/src/conversation_id.rs`, and
because it is referenced in a ton of places, we have to touch a lot of
files as part of this PR.

We also decide to get away from proper JSON-RPC 2.0 semantics, so we
also introduce `codex-rs/app-server-protocol/src/jsonrpc_lite.rs`, which
is basically the same `JSONRPCMessage` type defined in `mcp-types`
except with all of the `"jsonrpc": "2.0"` removed.

Getting rid of `"jsonrpc": "2.0"` makes our serialization logic
considerably simpler, as we can lean heavier on serde to serialize
directly into the wire format that we use now.
											
										
										
											2025-09-30 19:16:26 -07:00
+								use codex_protocol::ConversationId;
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								use codex_protocol::items::TurnItem;
-												Fix EventMsg Optional (#3604)


											
										
										
											2025-09-14 17:34:33 -07:00
+								use codex_protocol::protocol::ExitedReviewModeEvent;
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								use codex_protocol::protocol::ItemCompletedEvent;
 								use codex_protocol::protocol::ItemStartedEvent;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								use codex_protocol::protocol::ReviewRequest;
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								use codex_protocol::protocol::RolloutItem;
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								use codex_protocol::protocol::SessionSource;
-												send context window with task started (#2752)

- Send context window with task started
- Accounting for changing the model per turn
											
										
										
											2025-08-27 00:04:21 -07:00
+								use codex_protocol::protocol::TaskStartedEvent;
-												fix: introduce EventMsg::TurnAborted (#2365)

Introduces `EventMsg::TurnAborted` that should be sent in response to
`Op::Interrupt`.

In the MCP server, updates the handling of a
`ClientRequest::InterruptConversation` request such that it sends the
`Op::Interrupt` but does not respond to the request until it sees an
`EventMsg::TurnAborted`.
											
										
										
											2025-08-17 21:40:31 -07:00
+								use codex_protocol::protocol::TurnAbortReason;
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								use codex_protocol::protocol::TurnContextItem;
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
+								use futures::future::BoxFuture;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use futures::prelude::*;
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
+								use futures::stream::FuturesOrdered;
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								use mcp_types::CallToolResult;
-												[MCP] Add support for resources (#5239)

This PR adds support for [MCP
resources](https://modelcontextprotocol.io/specification/2025-06-18/server/resources)
by adding three new tools for the model:
1. `list_resources`
2. `list_resource_templates`
3. `read_resource`

These 3 tools correspond to the [three primary MCP resource protocol
messages](https://modelcontextprotocol.io/specification/2025-06-18/server/resources#protocol-messages).

Example of listing and reading a GitHub resource tempalte
<img width="2984" height="804" alt="CleanShot 2025-10-15 at 17 31 10"
src="https://github.com/user-attachments/assets/89b7f215-2e2a-41c5-90dd-b932ac84a585"
/>

`/mcp` with Figma configured
<img width="2984" height="442" alt="CleanShot 2025-10-15 at 18 29 35"
src="https://github.com/user-attachments/assets/a7578080-2ed2-4c59-b9b4-d8461f90d8ee"
/>

Fixes #4956
											
										
										
											2025-10-16 22:05:15 -07:00
+								use mcp_types::ListResourceTemplatesRequestParams;
 								use mcp_types::ListResourceTemplatesResult;
 								use mcp_types::ListResourcesRequestParams;
 								use mcp_types::ListResourcesResult;
 								use mcp_types::ReadResourceRequestParams;
 								use mcp_types::ReadResourceResult;
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								use serde_json;
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								use serde_json::Value;
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								use tokio::sync::Mutex;
-												Update cargo to 2024 edition (#842)

Some effects of this change:
- New formatting changes across many files. No functionality changes
should occur from that.
- Calls to `set_env` are considered unsafe, since this only happens in
tests we wrap them in `unsafe` blocks
											
										
										
											2025-05-07 08:37:48 -07:00
+								use tokio::sync::oneshot;
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								use tokio_util::sync::CancellationToken;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use tracing::debug;
-												feat: make cwd a required field of Config so we stop assuming std::env::current_dir() in a session (#800)

In order to expose Codex via an MCP server, I realized that we should be
taking `cwd` as a parameter rather than assuming
`std::env::current_dir()` as the `cwd`. Specifically, the user may want
to start a session in a directory other than the one where the MCP
server has been started.

This PR makes `cwd: PathBuf` a required field of `Session` and threads
it all the way through, though I think there is still an issue with not
honoring `workdir` for `apply_patch`, which is something we also had to
fix in the TypeScript version: https://github.com/openai/codex/pull/556.

This also adds `-C`/`--cd` to change the cwd via the command line.

To test, I ran:

```
cargo run --bin codex -- exec -C /tmp 'show the output of ls'
```

and verified it showed the contents of my `/tmp` folder instead of
`$PWD`.
											
										
										
											2025-05-04 10:57:12 -07:00
+								use tracing::error;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use tracing::info;
 								use tracing::trace;
 								use tracing::warn;
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								use crate::ModelProviderInfo;
-												chore: split apply_patch logic out of codex.rs and into apply_patch.rs (#1703)

This is a straight refactor, moving apply-patch-related code from
`codex.rs` and into the new `apply_patch.rs` file. The only "logical"
change is inlining `#[allow(clippy::unwrap_used)]` instead of declaring
`#![allow(clippy::unwrap_used)]` at the top of the file (which is
currently the case in `codex.rs`).

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1703).
* #1705
* __->__ #1703
* #1702
* #1698
* #1697
											
										
										
											2025-07-28 09:51:22 -07:00
+								use crate::apply_patch::convert_apply_patch_to_protocol;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::client::ModelClient;
-												feat: support the chat completions API in the Rust CLI (#862)

This is a substantial PR to add support for the chat completions API,
which in turn makes it possible to use non-OpenAI model providers (just
like in the TypeScript CLI):

* It moves a number of structs from `client.rs` to `client_common.rs` so
they can be shared.
* It introduces support for the chat completions API in
`chat_completions.rs`.
* It updates `ModelProviderInfo` so that `env_key` is `Option<String>`
instead of `String` (for e.g., ollama) and adds a `wire_api` field
* It updates `client.rs` to choose between `stream_responses()` and
`stream_chat_completions()` based on the `wire_api` for the
`ModelProviderInfo`
* It updates the `exec` and TUI CLIs to no longer fail if the
`OPENAI_API_KEY` environment variable is not set
* It updates the TUI so that `EventMsg::Error` is displayed more
prominently when it occurs, particularly now that it is important to
alert users to the `CodexErr::EnvVar` variant.
* `CodexErr::EnvVar` was updated to include an optional `instructions`
field so we can preserve the behavior where we direct users to
https://platform.openai.com if `OPENAI_API_KEY` is not set.
* Cleaned up the "welcome message" in the TUI to ensure the model
provider is displayed.
* Updated the docs in `codex-rs/README.md`.

To exercise the chat completions API from OpenAI models, I added the
following to my `config.toml`:

```toml
model = "gpt-4o"
model_provider = "openai-chat-completions"

[model_providers.openai-chat-completions]
name = "OpenAI using Chat Completions"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
wire_api = "chat"
```

Though to test a non-OpenAI provider, I installed ollama with mistral
locally on my Mac because ChatGPT said that would be a good match for my
hardware:

```shell
brew install ollama
ollama serve
ollama pull mistral
```

Then I added the following to my `~/.codex/config.toml`:

```toml
model = "mistral"
model_provider = "ollama"
```

Note this code could certainly use more test coverage, but I want to get
this in so folks can start playing with it.

For reference, I believe https://github.com/openai/codex/pull/247 was
roughly the comparable PR on the TypeScript side.
											
										
										
											2025-05-08 21:46:06 -07:00
+								use crate::client_common::Prompt;
 								use crate::client_common::ResponseEvent;
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
+								use crate::config::Config;
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
+								use crate::config_types::McpServerTransportConfig;
-												feat: introduce support for shell_environment_policy in config.toml (#1061)

To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.

This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.

Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:

| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array&lt;string&gt; | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table&lt;string,string&gt; | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array&lt;string&gt; | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |


In particular, note that the default is `inherit = "core"`, so:

* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`

This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.

Though if nothing else, previous to this change:

```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```

But after this change it does not print anything (as desired).

One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
											
										
										
											2025-05-22 09:51:19 -07:00
+								use crate::config_types::ShellEnvironmentPolicy;
-												feat: support the chat completions API in the Rust CLI (#862)

This is a substantial PR to add support for the chat completions API,
which in turn makes it possible to use non-OpenAI model providers (just
like in the TypeScript CLI):

* It moves a number of structs from `client.rs` to `client_common.rs` so
they can be shared.
* It introduces support for the chat completions API in
`chat_completions.rs`.
* It updates `ModelProviderInfo` so that `env_key` is `Option<String>`
instead of `String` (for e.g., ollama) and adds a `wire_api` field
* It updates `client.rs` to choose between `stream_responses()` and
`stream_chat_completions()` based on the `wire_api` for the
`ModelProviderInfo`
* It updates the `exec` and TUI CLIs to no longer fail if the
`OPENAI_API_KEY` environment variable is not set
* It updates the TUI so that `EventMsg::Error` is displayed more
prominently when it occurs, particularly now that it is important to
alert users to the `CodexErr::EnvVar` variant.
* `CodexErr::EnvVar` was updated to include an optional `instructions`
field so we can preserve the behavior where we direct users to
https://platform.openai.com if `OPENAI_API_KEY` is not set.
* Cleaned up the "welcome message" in the TUI to ensure the model
provider is displayed.
* Updated the docs in `codex-rs/README.md`.

To exercise the chat completions API from OpenAI models, I added the
following to my `config.toml`:

```toml
model = "gpt-4o"
model_provider = "openai-chat-completions"

[model_providers.openai-chat-completions]
name = "OpenAI using Chat Completions"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
wire_api = "chat"
```

Though to test a non-OpenAI provider, I installed ollama with mistral
locally on my Mac because ChatGPT said that would be a good match for my
hardware:

```shell
brew install ollama
ollama serve
ollama pull mistral
```

Then I added the following to my `~/.codex/config.toml`:

```toml
model = "mistral"
model_provider = "ollama"
```

Note this code could certainly use more test coverage, but I want to get
this in so folks can start playing with it.

For reference, I believe https://github.com/openai/codex/pull/247 was
roughly the comparable PR on the TypeScript side.
											
										
										
											2025-05-08 21:46:06 -07:00
+								use crate::conversation_history::ConversationHistory;
-												[context] Store context messages in rollouts (#2243)

## Summary
Currently, we use request-time logic to determine the user_instructions
and environment_context messages. This means that neither of these
values can change over time as conversations go on. We want to add in
additional details here, so we're migrating these to save these messages
to the rollout file instead. This is simpler for the client, and allows
us to append additional environment_context messages to each turn if we
want

## Testing
- [x] Integration test coverage
- [x] Tested locally with a few turns, confirmed model could reference
environment context and cached token metrics were reasonably high
											
										
										
											2025-08-14 14:51:13 -04:00
+								use crate::environment_context::EnvironmentContext;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::error::CodexErr;
 								use crate::error::Result as CodexResult;
-												chore: sandbox refactor 2 (#4653)

Revert the revert and fix the UI issue
											
										
										
											2025-10-03 11:17:39 +01:00
+								#[cfg(test)]
-												Include output truncation message in tool call results (#2183)

To avoid model being confused about incomplete output.
											
										
										
											2025-08-11 11:52:05 -07:00
+								use crate::exec::StreamOutput;
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								// Removed: legacy executor wiring replaced by ToolOrchestrator flows.
 								// legacy normalize_exec_result no longer used after orchestrator migration
-												[MCP] Add auth status to MCP servers (#4918)

This adds a queryable auth status for MCP servers which is useful:
1. To determine whether a streamable HTTP server supports auth or not
based on whether or not it supports RFC 8414-3.2
2. Allow us to build a better user experience on top of MCP status
											
										
										
											2025-10-08 14:37:57 -07:00
+								use crate::mcp::auth::compute_auth_statuses;
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
+								use crate::mcp_connection_manager::McpConnectionManager;
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								use crate::model_family::find_family_for_model;
-												send context window with task started (#2752)

- Send context window with task started
- Accounting for changing the model per turn
											
										
										
											2025-08-27 00:04:21 -07:00
+								use crate::openai_model_info::get_model_info;
-												fix: always send full instructions when using the Responses API (#1207)

This fixes a longstanding error in the Rust CLI where `codex.rs`
contained an errant `is_first_turn` check that would exclude the user
instructions for subsequent "turns" of a conversation when using the
responses API (i.e., when `previous_response_id` existed).

While here, renames `Prompt.instructions` to `Prompt.user_instructions`
since we now have quite a few levels of instructions floating around.
Also removed an unnecessary use of `clone()` in
`Prompt.get_full_instructions()`.
											
										
										
											2025-06-03 09:40:19 -07:00
+								use crate::project_doc::get_user_instructions;
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								use crate::protocol::AgentMessageDeltaEvent;
 								use crate::protocol::AgentReasoningDeltaEvent;
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								use crate::protocol::AgentReasoningRawContentDeltaEvent;
-												Re-add markdown streaming (#2029)

Wait for newlines, then render markdown on a line by line basis. Word wrap it for the current terminal size and then spit it out line by line into the UI. Also adds tests and fixes some UI regressions.
											
										
										
											2025-08-12 17:37:28 -07:00
+								use crate::protocol::AgentReasoningSectionBreakEvent;
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								use crate::protocol::ApplyPatchApprovalRequestEvent;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::AskForApproval;
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								use crate::protocol::BackgroundEventEvent;
 								use crate::protocol::ErrorEvent;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::Event;
 								use crate::protocol::EventMsg;
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								use crate::protocol::ExecApprovalRequestEvent;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::Op;
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								use crate::protocol::RateLimitSnapshot;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::ReviewDecision;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								use crate::protocol::ReviewOutputEvent;
-												Added model summary and risk assessment for commands that violate sandbox policy (#5536)

This PR adds support for a model-based summary and risk assessment for
commands that violate the sandbox policy and require user approval. This
aids the user in evaluating whether the command should be approved.

The feature works by taking a failed command and passing it back to the
model and asking it to summarize the command, give it a risk level (low,
medium, high) and a risk category (e.g. "data deletion" or "data
exfiltration"). It uses a new conversation thread so the context in the
existing thread doesn't influence the answer. If the call to the model
fails or takes longer than 5 seconds, it falls back to the current
behavior.

For now, this is an experimental feature and is gated by a config key
`experimental_sandbox_command_assessment`.

Here is a screen shot of the approval prompt showing the risk assessment
and summary.

<img width="723" height="282" alt="image"
src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b"
/>
											
										
										
											2025-10-24 17:23:44 -05:00
+								use crate::protocol::SandboxCommandAssessment;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::SandboxPolicy;
-												fix: tighten up some logic around session timestamps and ids (#922)

* update `SessionConfigured` event to include the UUID for the session
* show the UUID in the Rust TUI
* use local timestamps in log files instead of UTC
* include timestamps in log file names for easier discovery
											
										
										
											2025-05-13 19:22:16 -07:00
+								use crate::protocol::SessionConfiguredEvent;
-												Parse and expose stream errors (#2540)


											
										
										
											2025-08-21 01:15:24 -07:00
+								use crate::protocol::StreamErrorEvent;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::protocol::Submission;
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								use crate::protocol::TokenCountEvent;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								use crate::protocol::TokenUsage;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								use crate::protocol::TurnDiffEvent;
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
+								use crate::protocol::WebSearchBeginEvent;
-												feat: save session transcripts when using Rust CLI (#845)

This adds support for saving transcripts when using the Rust CLI. Like
the TypeScript CLI, it saves the transcript to `~/.codex/sessions`,
though it uses JSONL for the file format (and `.jsonl` for the file
extension) so that even if Codex crashes, what was written to the
`.jsonl` file should generally still be valid JSONL content.
											
										
										
											2025-05-07 13:49:15 -07:00
+								use crate::rollout::RolloutRecorder;
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								use crate::rollout::RolloutRecorderParams;
-												Optionally run using user profile (#1678)


											
										
										
											2025-07-25 11:45:23 -07:00
+								use crate::shell;
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								use crate::state::ActiveTurn;
 								use crate::state::SessionServices;
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								use crate::state::SessionState;
-												feat: add header for task kind (#5142)

Add a header in the responses API request for the task kind (compact,
review, ...) for observability purpose
The header name is `codex-task-type`
											
										
										
											2025-10-14 16:17:00 +01:00
+								use crate::state::TaskKind;
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								use crate::tasks::GhostSnapshotTask;
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								use crate::tasks::ReviewTask;
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								use crate::tasks::SessionTask;
 								use crate::tasks::SessionTaskContext;
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								use crate::tools::ToolRouter;
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								use crate::tools::context::SharedTurnDiffTracker;
 								use crate::tools::parallel::ToolCallRuntime;
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								use crate::tools::sandboxing::ApprovalStore;
 								use crate::tools::spec::ToolsConfig;
 								use crate::tools::spec::ToolsConfigParams;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								use crate::turn_diff_tracker::TurnDiffTracker;
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								use crate::unified_exec::UnifiedExecSessionManager;
-												Dividing UserMsgs into categories to send it back to the tui (#3127)

This PR does the following:

- divides user msgs into 3 categories: plain, user instructions, and
environment context
- Centralizes adding user instructions and environment context to a
degree
- Improve the integration testing

Building on top of #3123

Specifically this
[comment](https://github.com/openai/codex/pull/3123#discussion_r2319885089).
We need to send the user message while ignoring the User Instructions
and Environment Context we attach.
											
										
										
											2025-09-03 22:34:50 -07:00
+								use crate::user_instructions::UserInstructions;
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								use crate::user_notification::UserNotification;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use crate::util::backoff;
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								use codex_async_utils::OrCancelExt;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								use codex_otel::otel_event_manager::OtelEventManager;
-												consolidate reasoning enums into one (#2428)

We have three enums for each of reasoning summaries and reasoning effort
with same values. They can be consolidated into one.
											
										
										
											2025-08-18 11:50:17 -07:00
+								use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
 								use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
-												Move models.rs to protocol (#2595)

Moving models.rs to protocol so we can use them in `Codex` operations
											
										
										
											2025-08-22 15:18:54 -07:00
+								use codex_protocol::models::ContentItem;
 								use codex_protocol::models::FunctionCallOutputPayload;
 								use codex_protocol::models::ResponseInputItem;
 								use codex_protocol::models::ResponseItem;
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								use codex_protocol::protocol::InitialHistory;
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								use codex_protocol::user_input::UserInput;
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								use codex_utils_readiness::Readiness;
 								use codex_utils_readiness::ReadinessFlag;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Reland "refactor transcript view to handle HistoryCells" (#3753)

Reland of #3538
											
										
										
											2025-09-18 13:55:53 -07:00
+								pub mod compact;
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								use self::compact::build_compacted_history;
 								use self::compact::collect_user_messages;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								/// The high-level interface to the Codex system.
 								/// It operates as a queue pair where you send submissions and receive events.
 								pub struct Codex {
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								    next_id: AtomicU64,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    tx_sub: Sender<Submission>,
 								    rx_event: Receiver<Event>,
 								}
-												chore: update Codex::spawn() to return a struct instead of a tuple (#1677)

Also update `init_codex()` to return a `struct` instead of a tuple, as well.
											
										
										
											2025-07-27 20:01:35 -07:00
+								/// Wrapper returned by [`Codex::spawn`] containing the spawned [`Codex`],
 								/// the submission id for the initial `ConfigureSession` request and the
 								/// unique session id.
 								pub struct CodexSpawnOk {
 								    pub codex: Codex,
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								    pub conversation_id: ConversationId,
-												chore: update Codex::spawn() to return a struct instead of a tuple (#1677)

Also update `init_codex()` to return a `struct` instead of a tuple, as well.
											
										
										
											2025-07-27 20:01:35 -07:00
+								}
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								pub(crate) const INITIAL_SUBMIT_ID: &str = "";
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								pub(crate) const SUBMISSION_CHANNEL_CAPACITY: usize = 64;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								impl Codex {
-												chore: update Codex::spawn() to return a struct instead of a tuple (#1677)

Also update `init_codex()` to return a `struct` instead of a tuple, as well.
											
										
										
											2025-07-27 20:01:35 -07:00
+								    /// Spawn a new [`Codex`] and initialize the session.
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								    pub async fn spawn(
 								        config: Config,
 								        auth_manager: Arc<AuthManager>,
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								        conversation_history: InitialHistory,
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								        session_source: SessionSource,
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								    ) -> CodexResult<CodexSpawnOk> {
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								        let (tx_sub, rx_sub) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
-												Stream model responses (#1810)

Stream models thoughts and responses instead of waiting for the whole
thing to come through. Very rough right now, but I'm making the risk call to push through.
											
										
										
											2025-08-04 21:23:22 -07:00
+								        let (tx_event, rx_event) = async_channel::unbounded();
-												feat: add support for AGENTS.md in Rust CLI (#885)

The TypeScript CLI already has support for including the contents of
`AGENTS.md` in the instructions sent with the first turn of a
conversation. This PR brings this functionality to the Rust CLI.

To be considered, `AGENTS.md` must be in the `cwd` of the session, or in
one of the parent folders up to a Git/filesystem root (whichever is
encountered first).

By default, a maximum of 32 KiB of `AGENTS.md` will be included, though
this is configurable using the new-in-this-PR `project_doc_max_bytes`
option in `config.toml`.
											
										
										
											2025-05-10 17:52:59 -07:00
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
+								        let user_instructions = get_user_instructions(&config).await;
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        let config = Arc::new(config);
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								        let session_configuration = SessionConfiguration {
-												feat: read `model_provider` and `model_providers` from config.toml (#853)

This is the first step in supporting other model providers in the Rust
CLI. Specifically, this PR adds support for the new entries in `Config`
and `ConfigOverrides` to specify a `ModelProviderInfo`, which is the
basic config needed for an LLM provider. This PR does not get us all the
way there yet because `client.rs` still categorically appends
`/responses` to the URL and expects the endpoint to support the OpenAI
Responses API. Will fix that next!
											
										
										
											2025-05-07 17:38:28 -07:00
+								            provider: config.model_provider.clone(),
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								            model: config.model.clone(),
-												feat: make reasoning effort/summaries configurable (#1199)

Previous to this PR, we always set `reasoning` when making a request
using the Responses API:


https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111

Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:

```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```

We take a cue from the TypeScript CLI, which does a check on the model
name:


https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789

This PR does a similar check, though also adds support for the following
config options:

```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```

This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)

This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
											
										
										
											2025-06-02 16:01:34 -07:00
+								            model_reasoning_effort: config.model_reasoning_effort,
 								            model_reasoning_summary: config.model_reasoning_summary,
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
+								            user_instructions,
 								            base_instructions: config.base_instructions.clone(),
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								            approval_policy: config.approval_policy,
 								            sandbox_policy: config.sandbox_policy.clone(),
 								            cwd: config.cwd.clone(),
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            original_config_do_not_use: Arc::clone(&config),
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								            features: config.features.clone(),
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								        };
-												[mcp-server] Add reply tool call (#1643)

## Summary
Adds a new mcp tool call, `codex-reply`, so we can continue existing
sessions. This is a first draft and does not yet support sessions from
previous processes.

## Testing
- [x] tested with mcp client
											
										
										
											2025-07-21 21:01:56 -07:00
+								        // Generate a unique ID for the lifetime of this Codex session.
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								        let session = Session::new(
 								            session_configuration,
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								            config.clone(),
 								            auth_manager.clone(),
 								            tx_event.clone(),
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								            conversation_history,
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								            session_source,
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								        )
 								        .await
 								        .map_err(|e| {
 								            error!("Failed to create session: {e:#}");
 								            CodexErr::InternalAgentDied
 								        })?;
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								        let conversation_id = session.conversation_id;
-												chore: introduce ConversationManager as a clearinghouse for all conversations (#2240)

This PR does two things because after I got deep into the first one I
started pulling on the thread to the second:

- Makes `ConversationManager` the place where all in-memory
conversations are created and stored. Previously, `MessageProcessor` in
the `codex-mcp-server` crate was doing this via its `session_map`, but
this is something that should be done in `codex-core`.
- It unwinds the `ctrl_c: tokio::sync::Notify` that was threaded
throughout our code. I think this made sense at one time, but now that
we handle Ctrl-C within the TUI and have a proper `Op::Interrupt` event,
I don't think this was quite right, so I removed it. For `codex exec`
and `codex proto`, we now use `tokio::signal::ctrl_c()` directly, but we
no longer make `Notify` a field of `Codex` or `CodexConversation`.

Changes of note:

- Adds the files `conversation_manager.rs` and `codex_conversation.rs`
to `codex-core`.
- `Codex` and `CodexSpawnOk` are no longer exported from `codex-core`:
other crates must use `CodexConversation` instead (which is created via
`ConversationManager`).
- `core/src/codex_wrapper.rs` has been deleted in favor of
`ConversationManager`.
- `ConversationManager::new_conversation()` returns `NewConversation`,
which is in line with the `new_conversation` tool we want to add to the
MCP server. Note `NewConversation` includes `SessionConfiguredEvent`, so
we eliminate checks in cases like `codex-rs/core/tests/client.rs` to
verify `SessionConfiguredEvent` is the first event because that is now
internal to `ConversationManager`.
- Quite a bit of code was deleted from
`codex-rs/mcp-server/src/message_processor.rs` since it no longer has to
manage multiple conversations itself: it goes through
`ConversationManager` instead.
- `core/tests/live_agent.rs` has been deleted because I had to update a
bunch of tests and all the tests in here were ignored, and I don't think
anyone ever ran them, so this was just technical debt, at this point.
- Removed `notify_on_sigint()` from `util.rs` (and in a follow-up, I
hope to refactor the blandly-named `util.rs` into more descriptive
files).
- In general, I started replacing local variables named `codex` as
`conversation`, where appropriate, though admittedly I didn't do it
through all the integration tests because that would have added a lot of
noise to this PR.




---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2240).
* #2264
* #2263
* __->__ #2240
											
										
										
											2025-08-13 13:38:18 -07:00
 								        // This task will run until Op::Shutdown is received.
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								        tokio::spawn(submission_loop(session, config, rx_sub));
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								        let codex = Codex {
 								            next_id: AtomicU64::new(0),
 								            tx_sub,
 								            rx_event,
 								        };
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								        Ok(CodexSpawnOk {
 								            codex,
 								            conversation_id,
 								        })
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								    }
 								    /// Submit the `op` wrapped in a `Submission` with a unique ID.
 								    pub async fn submit(&self, op: Op) -> CodexResult<String> {
 								        let id = self
 								            .next_id
 								            .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
 								            .to_string();
 								        let sub = Submission { id: id.clone(), op };
 								        self.submit_with_id(sub).await?;
 								        Ok(id)
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								    /// Use sparingly: prefer `submit()` so Codex is responsible for generating
 								    /// unique IDs for each submission.
 								    pub async fn submit_with_id(&self, sub: Submission) -> CodexResult<()> {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        self.tx_sub
 								            .send(sub)
 								            .await
-												fix: creating an instance of Codex requires a Config (#859)

I discovered that I accidentally introduced a change in
https://github.com/openai/codex/pull/829 where we load a fresh `Config`
in the middle of `codex.rs`:


https://github.com/openai/codex/blob/c3e10e180a341e719f61014ea508f6d9dbffe05b/codex-rs/core/src/codex.rs#L515-L522

This is not good because the `Config` could differ from the one that has
the user's overrides specified from the CLI. Also, in unit tests, it
means the `Config` was picking up my personal settings as opposed to
using a vanilla config, which was problematic.

This PR cleans things up by moving the common case where
`Op::ConfigureSession` is derived from `Config` (originally done in
`codex_wrapper.rs`) and making it the standard way to initialize `Codex`
by putting it in `Codex::spawn()`. Note this also eliminates quite a bit
of boilerplate from the tests and relieves the caller of the
responsibility of minting out unique IDs when invoking `submit()`.
											
										
										
											2025-05-07 16:33:28 -07:00
+								            .map_err(|_| CodexErr::InternalAgentDied)?;
 								        Ok(())
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
 								    pub async fn next_event(&self) -> CodexResult<Event> {
 								        let event = self
 								            .rx_event
 								            .recv()
 								            .await
 								            .map_err(|_| CodexErr::InternalAgentDied)?;
 								        Ok(event)
 								    }
 								}
 								/// Context for an initialized model agent
 								///
 								/// A session has at most 1 running task at a time, and can be interrupted by user input.
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
+								pub(crate) struct Session {
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								    conversation_id: ConversationId,
-												fix: make all fields of Session private (#2285)

As `Session` needs a bit of work, it will make things easier to move
around if we can start by reducing the extent of its public API. This
makes all the fields private, though adds three `pub(crate)` getters.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2285).
* #2287
* #2286
* __->__ #2285
											
										
										
											2025-08-13 22:53:54 -07:00
+								    tx_event: Sender<Event>,
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								    state: Mutex<SessionState>,
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    pub(crate) active_turn: Mutex<Option<ActiveTurn>>,
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								    pub(crate) services: SessionServices,
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    next_internal_sub_id: AtomicU64,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								}
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								/// The context needed for a single turn of the conversation.
 								#[derive(Debug)]
 								pub(crate) struct TurnContext {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    pub(crate) sub_id: String,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    pub(crate) client: ModelClient,
 								    /// The session's current working directory. All relative paths provided by
 								    /// the model as well as sandbox policies are resolved against this path
 								    /// instead of `std::env::current_dir()`.
 								    pub(crate) cwd: PathBuf,
 								    pub(crate) base_instructions: Option<String>,
 								    pub(crate) user_instructions: Option<String>,
 								    pub(crate) approval_policy: AskForApproval,
 								    pub(crate) sandbox_policy: SandboxPolicy,
 								    pub(crate) shell_environment_policy: ShellEnvironmentPolicy,
 								    pub(crate) tools_config: ToolsConfig,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    pub(crate) is_review_mode: bool,
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								    pub(crate) final_output_json_schema: Option<Value>,
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								    pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								    pub(crate) tool_call_gate: Arc<ReadinessFlag>,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								}
 								impl TurnContext {
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								    pub(crate) fn resolve_path(&self, path: Option<String>) -> PathBuf {
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        path.as_ref()
 								            .map(PathBuf::from)
 								            .map_or_else(|| self.cwd.clone(), |p| self.cwd.join(p))
 								    }
 								}
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								#[derive(Clone)]
 								pub(crate) struct SessionConfiguration {
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								    /// Provider identifier ("openai", "openrouter", ...).
 								    provider: ModelProviderInfo,
 								    /// If not specified, server will use its default model.
 								    model: String,
-												feat: reasoning effort as optional (#3527)

Allow the reasoning effort to be optional
											
										
										
											2025-09-12 12:06:33 -07:00
+								    model_reasoning_effort: Option<ReasoningEffortConfig>,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								    model_reasoning_summary: ReasoningSummaryConfig,
 								    /// Model instructions that are appended to the base instructions.
 								    user_instructions: Option<String>,
 								    /// Base instructions override.
 								    base_instructions: Option<String>,
 								    /// When to escalate for approval for execution
 								    approval_policy: AskForApproval,
 								    /// How to sandbox commands executed in the system
 								    sandbox_policy: SandboxPolicy,
 								    /// Working directory that should be treated as the *root* of the
 								    /// session. All relative paths supplied by the model as well as the
 								    /// execution sandbox are resolved against this directory **instead**
 								    /// of the process-wide current working directory. CLI front-ends are
 								    /// expected to expand this to an absolute path before sending the
 								    /// `ConfigureSession` operation so that the business-logic layer can
 								    /// operate deterministically.
 								    cwd: PathBuf,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								    /// Set of feature flags for this session
 								    features: Features,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								    // TODO(pakrym): Remove config from here
 								    original_config_do_not_use: Arc<Config>,
 								}
 								impl SessionConfiguration {
 								    pub(crate) fn apply(&self, updates: &SessionSettingsUpdate) -> Self {
 								        let mut next_configuration = self.clone();
 								        if let Some(model) = updates.model.clone() {
 								            next_configuration.model = model;
 								        }
 								        if let Some(effort) = updates.reasoning_effort {
 								            next_configuration.model_reasoning_effort = effort;
 								        }
 								        if let Some(summary) = updates.reasoning_summary {
 								            next_configuration.model_reasoning_summary = summary;
 								        }
 								        if let Some(approval_policy) = updates.approval_policy {
 								            next_configuration.approval_policy = approval_policy;
 								        }
 								        if let Some(sandbox_policy) = updates.sandbox_policy.clone() {
 								            next_configuration.sandbox_policy = sandbox_policy;
 								        }
 								        if let Some(cwd) = updates.cwd.clone() {
 								            next_configuration.cwd = cwd;
 								        }
 								        next_configuration
 								    }
 								}
 								#[derive(Default, Clone)]
 								pub(crate) struct SessionSettingsUpdate {
 								    pub(crate) cwd: Option<PathBuf>,
 								    pub(crate) approval_policy: Option<AskForApproval>,
 								    pub(crate) sandbox_policy: Option<SandboxPolicy>,
 								    pub(crate) model: Option<String>,
 								    pub(crate) reasoning_effort: Option<Option<ReasoningEffortConfig>>,
 								    pub(crate) reasoning_summary: Option<ReasoningSummaryConfig>,
 								    pub(crate) final_output_json_schema: Option<Option<Value>>,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								}
-												feat: make cwd a required field of Config so we stop assuming std::env::current_dir() in a session (#800)

In order to expose Codex via an MCP server, I realized that we should be
taking `cwd` as a parameter rather than assuming
`std::env::current_dir()` as the `cwd`. Specifically, the user may want
to start a session in a directory other than the one where the MCP
server has been started.

This PR makes `cwd: PathBuf` a required field of `Session` and threads
it all the way through, though I think there is still an issue with not
honoring `workdir` for `apply_patch`, which is something we also had to
fix in the TypeScript version: https://github.com/openai/codex/pull/556.

This also adds `-C`/`--cd` to change the cwd via the command line.

To test, I ran:

```
cargo run --bin codex -- exec -C /tmp 'show the output of ls'
```

and verified it showed the contents of my `/tmp` folder instead of
`$PWD`.
											
										
										
											2025-05-04 10:57:12 -07:00
+								impl Session {
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								    fn make_turn_context(
 								        auth_manager: Option<Arc<AuthManager>>,
 								        otel_event_manager: &OtelEventManager,
 								        provider: ModelProviderInfo,
 								        session_configuration: &SessionConfiguration,
 								        conversation_id: ConversationId,
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        sub_id: String,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								    ) -> TurnContext {
 								        let config = session_configuration.original_config_do_not_use.clone();
 								        let model_family = find_family_for_model(&session_configuration.model)
 								            .unwrap_or_else(|| config.model_family.clone());
 								        let mut per_turn_config = (*config).clone();
 								        per_turn_config.model = session_configuration.model.clone();
 								        per_turn_config.model_family = model_family.clone();
 								        per_turn_config.model_reasoning_effort = session_configuration.model_reasoning_effort;
 								        per_turn_config.model_reasoning_summary = session_configuration.model_reasoning_summary;
 								        if let Some(model_info) = get_model_info(&model_family) {
 								            per_turn_config.model_context_window = Some(model_info.context_window);
 								        }
 								        let otel_event_manager = otel_event_manager.clone().with_model(
 								            session_configuration.model.as_str(),
 								            session_configuration.model.as_str(),
 								        );
 								        let client = ModelClient::new(
 								            Arc::new(per_turn_config),
 								            auth_manager,
 								            otel_event_manager,
 								            provider,
 								            session_configuration.model_reasoning_effort,
 								            session_configuration.model_reasoning_summary,
 								            conversation_id,
 								        );
 								        let tools_config = ToolsConfig::new(&ToolsConfigParams {
 								            model_family: &model_family,
 								            features: &config.features,
 								        });
 								        TurnContext {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								            sub_id,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            client,
 								            cwd: session_configuration.cwd.clone(),
 								            base_instructions: session_configuration.base_instructions.clone(),
 								            user_instructions: session_configuration.user_instructions.clone(),
 								            approval_policy: session_configuration.approval_policy,
 								            sandbox_policy: session_configuration.sandbox_policy.clone(),
 								            shell_environment_policy: config.shell_environment_policy.clone(),
 								            tools_config,
 								            is_review_mode: false,
 								            final_output_json_schema: None,
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								            codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								            tool_call_gate: Arc::new(ReadinessFlag::new()),
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								        }
 								    }
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								    async fn new(
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								        session_configuration: SessionConfiguration,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        config: Arc<Config>,
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								        auth_manager: Arc<AuthManager>,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        tx_event: Sender<Event>,
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
+								        initial_history: InitialHistory,
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								        session_source: SessionSource,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								    ) -> anyhow::Result<Arc<Self>> {
 								        debug!(
 								            "Configuring session: model={}; provider={:?}",
 								            session_configuration.model, session_configuration.provider
 								        );
 								        if !session_configuration.cwd.is_absolute() {
 								            return Err(anyhow::anyhow!(
 								                "cwd is not absolute: {:?}",
 								                session_configuration.cwd
 								            ));
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        }
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								        let (conversation_id, rollout_params) = match &initial_history {
 								            InitialHistory::New | InitialHistory::Forked(_) => {
 								                let conversation_id = ConversationId::default();
 								                (
 								                    conversation_id,
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								                    RolloutRecorderParams::new(
 								                        conversation_id,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								                        session_configuration.user_instructions.clone(),
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								                        session_source,
 								                    ),
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								                )
 								            }
 								            InitialHistory::Resumed(resumed_history) => (
 								                resumed_history.conversation_id,
 								                RolloutRecorderParams::resume(resumed_history.rollout_path.clone()),
 								            ),
 								        };
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        // Error messages to dispatch after SessionConfigured is sent.
 								        let mut post_session_configured_error_events = Vec::<Event>::new();
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								        // Kick off independent async setup tasks in parallel to reduce startup latency.
 								        //
 								        // - initialize RolloutRecorder with new or resumed session info
 								        // - spin up MCP connection manager
 								        // - perform default shell discovery
 								        // - load history metadata
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								        let rollout_fut = RolloutRecorder::new(&config, rollout_params);
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
-												[MCP] Introduce an experimental official rust sdk based mcp client (#4252)

The [official Rust
SDK](https://github.com/modelcontextprotocol/rust-sdk/tree/57fc428c578a1a3fe851ee0838bf068bda120eb3)
has come a long way since we first started our mcp client implementation
5 months ago and, today, it is much more complete than our own
stdio-only implementation.

This PR introduces a new config flag `experimental_use_rmcp_client`
which will use a new mcp client powered by the sdk instead of our own.

To keep this PR simple, I've only implemented the same stdio MCP
functionality that we had but will expand on it with future PRs.

---------

Co-authored-by: pakrym-oai <pakrym@openai.com>
											
										
										
											2025-09-26 10:13:37 -07:00
+								        let mcp_fut = McpConnectionManager::new(
 								            config.mcp_servers.clone(),
-												[MCP] Add the ability to explicitly specify a credentials store (#4857)

This lets users/companies explicitly choose whether to force/disallow
the keyring/fallback file storage for mcp credentials.

People who develop with Codex will want to use this until we sign
binaries or else each ad-hoc debug builds will require keychain access
on every build. I don't love this and am open to other ideas for how to
handle that.


```toml
mcp_oauth_credentials_store = "auto"
mcp_oauth_credentials_store = "file"
mcp_oauth_credentials_store = "keyrung"
```
Defaults to `auto`
											
										
										
											2025-10-07 19:39:32 -07:00
+								            config.mcp_oauth_credentials_store_mode,
-												[MCP] Introduce an experimental official rust sdk based mcp client (#4252)

The [official Rust
SDK](https://github.com/modelcontextprotocol/rust-sdk/tree/57fc428c578a1a3fe851ee0838bf068bda120eb3)
has come a long way since we first started our mcp client implementation
5 months ago and, today, it is much more complete than our own
stdio-only implementation.

This PR introduces a new config flag `experimental_use_rmcp_client`
which will use a new mcp client powered by the sdk instead of our own.

To keep this PR simple, I've only implemented the same stdio MCP
functionality that we had but will expand on it with future PRs.

---------

Co-authored-by: pakrym-oai <pakrym@openai.com>
											
										
										
											2025-09-26 10:13:37 -07:00
+								        );
-												Back out "feat: POSIX unification and snapshot sessions (#3179)" (#3430)

This reverts https://github.com/openai/codex/pull/3179.

#3179 appears to introduce a regression where sourcing dotfiles causes a
bunch of activity in the title bar (and potentially slows things down?)


https://github.com/user-attachments/assets/a68f7fb3-0749-4e0e-a321-2aa6993e01da

Verified this no longer happens after backing out #3179.

Original commit changeset: 62bd0e3d9d5a
											
										
										
											2025-09-10 12:40:24 -07:00
+								        let default_shell_fut = shell::default_user_shell();
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								        let history_meta_fut = crate::message_history::history_metadata(&config);
-												[MCP] Prompt `mcp login` when adding a streamable HTTP server that supports oauth (#5193)

1. If Codex detects that a `codex mcp add -url …` server supports oauth,
it will auto-initiate the login flow.
2. If the TUI starts and a MCP server supports oauth but isn't logged
in, it will give the user an explicit warning telling them to log in.
											
										
										
											2025-10-15 09:27:40 -07:00
+								        let auth_statuses_fut = compute_auth_statuses(
 								            config.mcp_servers.iter(),
 								            config.mcp_oauth_credentials_store_mode,
 								        );
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
 								        // Join all independent futures.
-												[MCP] Prompt `mcp login` when adding a streamable HTTP server that supports oauth (#5193)

1. If Codex detects that a `codex mcp add -url …` server supports oauth,
it will auto-initiate the login flow.
2. If the TUI starts and a MCP server supports oauth but isn't logged
in, it will give the user an explicit warning telling them to log in.
											
										
										
											2025-10-15 09:27:40 -07:00
+								        let (
 								            rollout_recorder,
 								            mcp_res,
 								            default_shell,
 								            (history_log_id, history_entry_count),
 								            auth_statuses,
 								        ) = tokio::join!(
 								            rollout_fut,
 								            mcp_fut,
 								            default_shell_fut,
 								            history_meta_fut,
 								            auth_statuses_fut
 								        );
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								        let rollout_recorder = rollout_recorder.map_err(|e| {
 								            error!("failed to initialize rollout recorder: {e:#}");
 								            anyhow::anyhow!("failed to initialize rollout recorder: {e:#}")
 								        })?;
-												fix: include rollout_path in NewConversationResponse (#3352)

Adding the `rollout_path` to the `NewConversationResponse` makes it so a
client can perform subsequent operations on a `(ConversationId,
PathBuf)` pair. #3353 will introduce support for `ArchiveConversation`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/3352).
* #3353
* __->__ #3352
											
										
										
											2025-09-09 00:11:48 -07:00
+								        let rollout_path = rollout_recorder.rollout_path.clone();
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								        // Handle MCP manager result and record any startup failures.
 								        let (mcp_connection_manager, failed_clients) = match mcp_res {
 								            Ok((mgr, failures)) => (mgr, failures),
 								            Err(e) => {
 								                let message = format!("Failed to create MCP connection manager: {e:#}");
 								                error!("{message}");
 								                post_session_configured_error_events.push(Event {
 								                    id: INITIAL_SUBMIT_ID.to_owned(),
 								                    msg: EventMsg::Error(ErrorEvent { message }),
 								                });
 								                (McpConnectionManager::default(), Default::default())
 								            }
 								        };
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
 								        // Surface individual client start-up failures to the user.
 								        if !failed_clients.is_empty() {
 								            for (server_name, err) in failed_clients {
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
+								                let auth_entry = auth_statuses.get(&server_name);
 								                let display_message = mcp_init_error_display(&server_name, auth_entry, &err);
 								                warn!("MCP client for `{server_name}` failed to start: {err:#}");
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								                post_session_configured_error_events.push(Event {
 								                    id: INITIAL_SUBMIT_ID.to_owned(),
-												[MCP] Prompt `mcp login` when adding a streamable HTTP server that supports oauth (#5193)

1. If Codex detects that a `codex mcp add -url …` server supports oauth,
it will auto-initiate the login flow.
2. If the TUI starts and a MCP server supports oauth but isn't logged
in, it will give the user an explicit warning telling them to log in.
											
										
										
											2025-10-15 09:27:40 -07:00
+								                    msg: EventMsg::Error(ErrorEvent {
 								                        message: display_message,
 								                    }),
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								                });
 								            }
 								        }
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        let otel_event_manager = OtelEventManager::new(
 								            conversation_id,
 								            config.model.as_str(),
 								            config.model_family.slug.as_str(),
 								            auth_manager.auth().and_then(|a| a.get_account_id()),
-												[codex][otel] propagate user email in otel events (#5223)

include user email into otel events for proper user-level attribution in
case of workspace setup
											
										
										
											2025-10-15 17:53:33 -07:00
+								            auth_manager.auth().and_then(|a| a.get_account_email()),
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								            auth_manager.auth().map(|a| a.mode),
 								            config.otel.log_user_prompt,
 								            terminal::user_agent(),
 								        );
 								        otel_event_manager.conversation_starts(
 								            config.model_provider.name.as_str(),
 								            config.model_reasoning_effort,
 								            config.model_reasoning_summary,
 								            config.model_context_window,
 								            config.model_max_output_tokens,
 								            config.model_auto_compact_token_limit,
 								            config.approval_policy,
 								            config.sandbox_policy.clone(),
 								            config.mcp_servers.keys().map(String::as_str).collect(),
 								            config.active_profile.clone(),
 								        );
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								        // Create the mutable state for the Session.
 								        let state = SessionState::new(session_configuration.clone());
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        let services = SessionServices {
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								            mcp_connection_manager,
-												Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
											
										
										
											2025-09-10 17:38:11 -07:00
+								            unified_exec_manager: UnifiedExecSessionManager::default(),
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            notifier: UserNotifier::new(config.notify.clone()),
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								            rollout: Mutex::new(Some(rollout_recorder)),
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								            user_shell: default_shell,
 								            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            auth_manager: Arc::clone(&auth_manager),
 								            otel_event_manager,
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								            tool_approvals: Mutex::new(ApprovalStore::default()),
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        };
 								        let sess = Arc::new(Session {
 								            conversation_id,
 								            tx_event: tx_event.clone(),
 								            state: Mutex::new(state),
 								            active_turn: Mutex::new(None),
 								            services,
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								            next_internal_sub_id: AtomicU64::new(0),
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        });
-												fix: parallelize logic in Session::new() (#2305)

#2291 made it so that `Session::new()` is on the critical path to
`Codex::spawn()`, which means it is on the hot path to CLI startup. This
refactors `Session::new()` to run a number of async tasks in parallel
that were previously run serially to try to reduce latency.
											
										
										
											2025-08-14 13:29:58 -07:00
+								        // Dispatch the SessionConfiguredEvent first and then report any errors.
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
+								        // If resuming, include converted initial messages in the payload so UIs can render them immediately.
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        let initial_messages = initial_history.get_event_msgs();
-												Dividing UserMsgs into categories to send it back to the tui (#3127)

This PR does the following:

- divides user msgs into 3 categories: plain, user instructions, and
environment context
- Centralizes adding user instructions and environment context to a
degree
- Improve the integration testing

Building on top of #3123

Specifically this
[comment](https://github.com/openai/codex/pull/3123#discussion_r2319885089).
We need to send the user message while ignoring the User Instructions
and Environment Context we attach.
											
										
										
											2025-09-03 22:34:50 -07:00
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        let events = std::iter::once(Event {
 								            id: INITIAL_SUBMIT_ID.to_owned(),
 								            msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								                session_id: conversation_id,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								                model: session_configuration.model.clone(),
 								                reasoning_effort: session_configuration.model_reasoning_effort,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								                history_log_id,
 								                history_entry_count,
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
+								                initial_messages,
-												fix: include rollout_path in NewConversationResponse (#3352)

Adding the `rollout_path` to the `NewConversationResponse` makes it so a
client can perform subsequent operations on a `(ConversationId,
PathBuf)` pair. #3353 will introduce support for `ArchiveConversation`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/3352).
* #3353
* __->__ #3352
											
										
										
											2025-09-09 00:11:48 -07:00
+								                rollout_path,
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								            }),
 								        })
 								        .chain(post_session_configured_error_events.into_iter());
 								        for event in events {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								            sess.send_event_raw(event).await;
-												exploration: create Session as part of Codex::spawn() (#2291)

Historically, `Codex::spawn()` would create the instance of `Codex` and
enforce, by construction, that `Op::ConfigureSession` was the first `Op`
submitted via `submit()`. Then over in `submission_loop()`, it would
handle the case for taking the parameters of `Op::ConfigureSession` and
turning it into a `Session`.

This approach has two challenges from a state management perspective:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L718

- The local `sess` variable in `submission_loop()` has to be `mut` and
`Option<Arc<Session>>` because it is not invariant that a `Session` is
present for the lifetime of the loop, so there is a lot of logic to deal
with the case where `sess` is `None` (e.g., the `send_no_session_event`
function and all of its callsites).
- `submission_loop()` is written in such a way that
`Op::ConfigureSession` could be observed multiple times, but in
practice, it is only observed exactly once at the start of the loop.

In this PR, we try to simplify the state management by _removing_ the
`Op::ConfigureSession` enum variant and constructing the `Session` as
part of `Codex::spawn()` so that it can be passed to `submission_loop()`
as `Arc<Session>`. The original logic from the `Op::ConfigureSession`
has largely been moved to the new `Session::new()` constructor.

---

Incidentally, I also noticed that the handling of `Op::ConfigureSession`
can result in events being dispatched in addition to
`EventMsg::SessionConfigured`, as an `EventMsg::Error` is created for
every MCP initialization error, so it is important to preserve that
behavior:


https://github.com/openai/codex/blob/f968a1327ad39a7786759ea8f1d1c088fe41e91b/codex-rs/core/src/codex.rs#L901-L916

Though admittedly, I believe this does not play nice with #2264, as
these error messages will likely be dispatched before the client has a
chance to call `addConversationListener`, so we likely need to make it
so `newConversation` automatically creates the subscription, but we must
also guarantee that the "ack" from `newConversation` is returned before
any other conversation-related notifications are sent so the client
knows what `conversation_id` to match on.
											
										
										
											2025-08-14 09:55:28 -07:00
+								        }
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
+								        // record_initial_history can emit events. We record only after the SessionConfiguredEvent is emitted.
 								        sess.record_initial_history(initial_history).await;
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								        Ok(sess)
-												feat: make cwd a required field of Config so we stop assuming std::env::current_dir() in a session (#800)

In order to expose Codex via an MCP server, I realized that we should be
taking `cwd` as a parameter rather than assuming
`std::env::current_dir()` as the `cwd`. Specifically, the user may want
to start a session in a directory other than the one where the MCP
server has been started.

This PR makes `cwd: PathBuf` a required field of `Session` and threads
it all the way through, though I think there is still an issue with not
honoring `workdir` for `apply_patch`, which is something we also had to
fix in the TypeScript version: https://github.com/openai/codex/pull/556.

This also adds `-C`/`--cd` to change the cwd via the command line.

To test, I ran:

```
cargo run --bin codex -- exec -C /tmp 'show the output of ls'
```

and verified it showed the contents of my `/tmp` folder instead of
`$PWD`.
											
										
										
											2025-05-04 10:57:12 -07:00
+								    }
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								    pub(crate) fn get_tx_event(&self) -> Sender<Event> {
 								        self.tx_event.clone()
 								    }
-												Add feedback upload request handling (#5682)


											
										
										
											2025-10-26 22:53:39 -07:00
+								    /// Ensure all rollout writes are durably flushed.
 								    pub(crate) async fn flush_rollout(&self) {
 								        let recorder = {
 								            let guard = self.services.rollout.lock().await;
 								            guard.clone()
 								        };
 								        if let Some(rec) = recorder
 								            && let Err(e) = rec.flush().await
 								        {
 								            warn!("failed to flush rollout recorder: {e}");
 								        }
 								    }
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    fn next_internal_sub_id(&self) -> String {
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								        let id = self
 								            .next_internal_sub_id
 								            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								        format!("auto-compact-{id}")
 								    }
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								    async fn record_initial_history(&self, conversation_history: InitialHistory) {
 								        let turn_context = self.new_turn(SessionSettingsUpdate::default()).await;
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								        match conversation_history {
 								            InitialHistory::New => {
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                // Build and record initial items (user instructions + environment context)
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								                let items = self.build_initial_context(&turn_context);
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
+								                self.record_conversation_items(&turn_context, &items).await;
-												Add feedback upload request handling (#5682)


											
										
										
											2025-10-26 22:53:39 -07:00
+								                // Ensure initial items are visible to immediate readers (e.g., tests, forks).
 								                self.flush_rollout().await;
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								            }
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								            InitialHistory::Resumed(_) | InitialHistory::Forked(_) => {
 								                let rollout_items = conversation_history.get_rollout_items();
 								                let persist = matches!(conversation_history, InitialHistory::Forked(_));
 								                // Always add response items to conversation history
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								                let reconstructed_history =
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								                    self.reconstruct_history_from_rollout(&turn_context, &rollout_items);
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								                if !reconstructed_history.is_empty() {
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								                    self.record_into_history(&reconstructed_history).await;
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                }
 								                // If persisting, persist all rollout items as-is (recorder filters)
 								                if persist && !rollout_items.is_empty() {
 								                    self.persist_rollout_items(&rollout_items).await;
 								                }
-												Add feedback upload request handling (#5682)


											
										
										
											2025-10-26 22:53:39 -07:00
+								                // Flush after seeding history and any persisted rollout copy.
 								                self.flush_rollout().await;
-												chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
											
										
										
											2025-09-02 15:44:29 -07:00
+								            }
 								        }
 								    }
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								    pub(crate) async fn update_settings(&self, updates: SessionSettingsUpdate) {
 								        let mut state = self.state.lock().await;
 								        state.session_configuration = state.session_configuration.apply(&updates);
 								    }
 								    pub(crate) async fn new_turn(&self, updates: SessionSettingsUpdate) -> Arc<TurnContext> {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        let sub_id = self.next_internal_sub_id();
 								        self.new_turn_with_sub_id(sub_id, updates).await
 								    }
 								    pub(crate) async fn new_turn_with_sub_id(
 								        &self,
 								        sub_id: String,
 								        updates: SessionSettingsUpdate,
 								    ) -> Arc<TurnContext> {
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								        let session_configuration = {
 								            let mut state = self.state.lock().await;
 								            let session_configuration = state.session_configuration.clone().apply(&updates);
 								            state.session_configuration = session_configuration.clone();
 								            session_configuration
 								        };
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
 								        let mut turn_context: TurnContext = Self::make_turn_context(
 								            Some(Arc::clone(&self.services.auth_manager)),
 								            &self.services.otel_event_manager,
 								            session_configuration.provider.clone(),
 								            &session_configuration,
 								            self.conversation_id,
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								            sub_id,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								        );
 								        if let Some(final_schema) = updates.final_output_json_schema {
 								            turn_context.final_output_json_schema = final_schema;
 								        }
 								        Arc::new(turn_context)
 								    }
 								    fn build_environment_update_item(
 								        &self,
 								        previous: Option<&Arc<TurnContext>>,
 								        next: &TurnContext,
 								    ) -> Option<ResponseItem> {
 								        let prev = previous?;
 								        let prev_context = EnvironmentContext::from(prev.as_ref());
 								        let next_context = EnvironmentContext::from(next);
 								        if prev_context.equals_except_shell(&next_context) {
 								            return None;
 								        }
 								        Some(ResponseItem::from(EnvironmentContext::diff(
 								            prev.as_ref(),
 								            next,
 								        )))
 								    }
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    /// Persist the event to rollout and send it to clients.
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    pub(crate) async fn send_event(&self, turn_context: &TurnContext, msg: EventMsg) {
 								        let event = Event {
 								            id: turn_context.sub_id.clone(),
 								            msg,
 								        };
 								        self.send_event_raw(event).await;
 								    }
 								    pub(crate) async fn send_event_raw(&self, event: Event) {
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        // Persist the event into rollout (recorder filters as needed)
 								        let rollout_items = vec![RolloutItem::EventMsg(event.msg.clone())];
 								        self.persist_rollout_items(&rollout_items).await;
-												fix: make all fields of Session struct private again (#840)

https://github.com/openai/codex/pull/829 noted it introduced a circular
dep between `codex.rs` and `mcp_tool_call.rs`. This attempts to clean
things up: the circular dep still exists, but at least all the fields of
`Session` are private again.
											
										
										
											2025-05-06 16:21:35 -07:00
+								        if let Err(e) = self.tx_event.send(event).await {
 								            error!("failed to send tool call event: {e}");
 								        }
 								    }
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								    async fn emit_turn_item_started(&self, turn_context: &TurnContext, item: &TurnItem) {
 								        self.send_event(
 								            turn_context,
 								            EventMsg::ItemStarted(ItemStartedEvent {
 								                thread_id: self.conversation_id,
 								                turn_id: turn_context.sub_id.clone(),
 								                item: item.clone(),
 								            }),
 								        )
 								        .await;
 								    }
 								    async fn emit_turn_item_completed(
 								        &self,
 								        turn_context: &TurnContext,
 								        item: TurnItem,
 								        emit_raw_agent_reasoning: bool,
 								    ) {
 								        self.send_event(
 								            turn_context,
 								            EventMsg::ItemCompleted(ItemCompletedEvent {
 								                thread_id: self.conversation_id,
 								                turn_id: turn_context.sub_id.clone(),
 								                item: item.clone(),
 								            }),
 								        )
 								        .await;
 								        self.emit_turn_item_legacy_events(turn_context, &item, emit_raw_agent_reasoning)
 								            .await;
 								    }
 								    async fn emit_turn_item_started_completed(
 								        &self,
 								        turn_context: &TurnContext,
 								        item: TurnItem,
 								        emit_raw_agent_reasoning: bool,
 								    ) {
 								        self.emit_turn_item_started(turn_context, &item).await;
 								        self.emit_turn_item_completed(turn_context, item, emit_raw_agent_reasoning)
 								            .await;
 								    }
 								    async fn emit_turn_item_legacy_events(
 								        &self,
 								        turn_context: &TurnContext,
 								        item: &TurnItem,
 								        emit_raw_agent_reasoning: bool,
 								    ) {
 								        for event in item.as_legacy_events(emit_raw_agent_reasoning) {
 								            self.send_event(turn_context, event).await;
 								        }
 								    }
-												Added model summary and risk assessment for commands that violate sandbox policy (#5536)

This PR adds support for a model-based summary and risk assessment for
commands that violate the sandbox policy and require user approval. This
aids the user in evaluating whether the command should be approved.

The feature works by taking a failed command and passing it back to the
model and asking it to summarize the command, give it a risk level (low,
medium, high) and a risk category (e.g. "data deletion" or "data
exfiltration"). It uses a new conversation thread so the context in the
existing thread doesn't influence the answer. If the call to the model
fails or takes longer than 5 seconds, it falls back to the current
behavior.

For now, this is an experimental feature and is gated by a config key
`experimental_sandbox_command_assessment`.

Here is a screen shot of the approval prompt showing the risk assessment
and summary.

<img width="723" height="282" alt="image"
src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b"
/>
											
										
										
											2025-10-24 17:23:44 -05:00
+								    pub(crate) async fn assess_sandbox_command(
 								        &self,
 								        turn_context: &TurnContext,
 								        call_id: &str,
 								        command: &[String],
 								        failure_message: Option<&str>,
 								    ) -> Option<SandboxCommandAssessment> {
 								        let config = turn_context.client.config();
 								        let provider = turn_context.client.provider().clone();
 								        let auth_manager = Arc::clone(&self.services.auth_manager);
 								        let otel = self.services.otel_event_manager.clone();
 								        crate::sandboxing::assessment::assess_command(
 								            config,
 								            provider,
 								            auth_manager,
 								            &otel,
 								            self.conversation_id,
 								            call_id,
 								            command,
 								            &turn_context.sandbox_policy,
 								            &turn_context.cwd,
 								            failure_message,
 								        )
 								        .await
 								    }
-												chore: sandbox refactor 2 (#4653)

Revert the revert and fix the UI issue
											
										
										
											2025-10-03 11:17:39 +01:00
+								    /// Emit an exec approval request event and await the user's decision.
 								    ///
 								    /// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
 								    /// to the correct in-flight turn. If the task is aborted, this returns the
 								    /// default `ReviewDecision` (`Denied`).
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    pub async fn request_command_approval(
 								        &self,
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        turn_context: &TurnContext,
-												Improve messages emitted for exec failures (#1659)

1. Emit call_id to exec approval elicitations for mcp client convenience
2. Remove the `-retry` from the call id for the same reason as above but
upstream the reset behavior to the mcp client
											
										
										
											2025-07-23 11:43:53 -07:00
+								        call_id: String,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        command: Vec<String>,
 								        cwd: PathBuf,
 								        reason: Option<String>,
-												Added model summary and risk assessment for commands that violate sandbox policy (#5536)

This PR adds support for a model-based summary and risk assessment for
commands that violate the sandbox policy and require user approval. This
aids the user in evaluating whether the command should be approved.

The feature works by taking a failed command and passing it back to the
model and asking it to summarize the command, give it a risk level (low,
medium, high) and a risk category (e.g. "data deletion" or "data
exfiltration"). It uses a new conversation thread so the context in the
existing thread doesn't influence the answer. If the call to the model
fails or takes longer than 5 seconds, it falls back to the current
behavior.

For now, this is an experimental feature and is gated by a config key
`experimental_sandbox_command_assessment`.

Here is a screen shot of the approval prompt showing the risk assessment
and summary.

<img width="723" height="282" alt="image"
src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b"
/>
											
										
										
											2025-10-24 17:23:44 -05:00
+								        risk: Option<SandboxCommandAssessment>,
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								    ) -> ReviewDecision {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        let sub_id = turn_context.sub_id.clone();
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        // Add the tx_approve callback to the map before sending the request.
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        let (tx_approve, rx_approve) = oneshot::channel();
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        let event_id = sub_id.clone();
 								        let prev_entry = {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            let mut active = self.active_turn.lock().await;
 								            match active.as_mut() {
 								                Some(at) => {
 								                    let mut ts = at.turn_state.lock().await;
 								                    ts.insert_pending_approval(sub_id, tx_approve)
 								                }
 								                None => None,
 								            }
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        };
 								        if prev_entry.is_some() {
 								            warn!("Overwriting existing pending approval for sub_id: {event_id}");
 								        }
-												feat: add Vec<ParsedCommand> to ExecApprovalRequestEvent (#5222)

This adds `parsed_cmd: Vec<ParsedCommand>` to `ExecApprovalRequestEvent`
in the core protocol (`protocol/src/protocol.rs`), which is also what
this field is named on `ExecCommandBeginEvent`. Honestly, I don't love
the name (it sounds like a single command, but it is actually a list of
them), but I don't want to get distracted by a naming discussion right
now.

This also adds `parsed_cmd` to `ExecCommandApprovalParams` in
`codex-rs/app-server-protocol/src/protocol.rs`, so it will be available
via `codex app-server`, as well.

For consistency, I also updated `ExecApprovalElicitRequestParams` in
`codex-rs/mcp-server/src/exec_approval.rs` to include this field under
the name `codex_parsed_cmd`, as that struct already has a number of
special `codex_*` fields. Note this is the code for when Codex is used
as an MCP _server_ and therefore has to conform to the official spec for
an MCP elicitation type.
											
										
										
											2025-10-15 13:58:40 -07:00
+								        let parsed_cmd = parse_command(&command);
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        let event = EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
 								            call_id,
 								            command,
 								            cwd,
 								            reason,
-												Added model summary and risk assessment for commands that violate sandbox policy (#5536)

This PR adds support for a model-based summary and risk assessment for
commands that violate the sandbox policy and require user approval. This
aids the user in evaluating whether the command should be approved.

The feature works by taking a failed command and passing it back to the
model and asking it to summarize the command, give it a risk level (low,
medium, high) and a risk category (e.g. "data deletion" or "data
exfiltration"). It uses a new conversation thread so the context in the
existing thread doesn't influence the answer. If the call to the model
fails or takes longer than 5 seconds, it falls back to the current
behavior.

For now, this is an experimental feature and is gated by a config key
`experimental_sandbox_command_assessment`.

Here is a screen shot of the approval prompt showing the risk assessment
and summary.

<img width="723" height="282" alt="image"
src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b"
/>
											
										
										
											2025-10-24 17:23:44 -05:00
+								            risk,
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								            parsed_cmd,
 								        });
 								        self.send_event(turn_context, event).await;
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								        rx_approve.await.unwrap_or_default()
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
 								    pub async fn request_patch_approval(
 								        &self,
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        turn_context: &TurnContext,
-												Add call_id to patch approvals and elicitations (#1660)

Builds on https://github.com/openai/codex/pull/1659 and adds call_id to
a few more places for the same reason.
											
										
										
											2025-07-23 12:55:35 -07:00
+								        call_id: String,
-												fix: ensure apply_patch resolves relative paths against workdir or project cwd (#810)

https://github.com/openai/codex/pull/800 kicked off some work to be more
disciplined about honoring the `cwd` param passed in rather than
assuming `std::env::current_dir()` as the `cwd`. As part of this, we
need to ensure `apply_patch` calls honor the appropriate `cwd` as well,
which is significant if the paths in the `apply_patch` arg are not
absolute paths themselves. Failing that:

- The `apply_patch` function call can contain an optional`workdir`
param, so:
- If specified and is an absolute path, it should be used to resolve
relative paths
- If specified and is a relative path, should be resolved against
`Config.cwd` and then any relative paths will be resolved against the
result
- If `workdir` is not specified on the function call, relative paths
should be resolved against `Config.cwd`

Note that we had a similar issue in the TypeScript CLI that was fixed in
https://github.com/openai/codex/pull/556.

As part of the fix, this PR introduces `ApplyPatchAction` so clients can
deal with that instead of the raw `HashMap<PathBuf,
ApplyPatchFileChange>`. This enables us to enforce, by construction,
that all paths contained in the `ApplyPatchAction` are absolute paths.
											
										
										
											2025-05-04 12:32:51 -07:00
+								        action: &ApplyPatchAction,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        reason: Option<String>,
 								        grant_root: Option<PathBuf>,
 								    ) -> oneshot::Receiver<ReviewDecision> {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        let sub_id = turn_context.sub_id.clone();
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        // Add the tx_approve callback to the map before sending the request.
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        let (tx_approve, rx_approve) = oneshot::channel();
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        let event_id = sub_id.clone();
 								        let prev_entry = {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            let mut active = self.active_turn.lock().await;
 								            match active.as_mut() {
 								                Some(at) => {
 								                    let mut ts = at.turn_state.lock().await;
 								                    ts.insert_pending_approval(sub_id, tx_approve)
 								                }
 								                None => None,
 								            }
-												fix: add callback to map before sending request to fix race condition (#3146)

Last week, I thought I found the smoking gun in our flaky integration
tests where holding these locks could have led to potential deadlock:

- https://github.com/openai/codex/pull/2876
- https://github.com/openai/codex/pull/2878

Yet even after those PRs went in, we continued to see flakinees in our
integration tests! Though with the additional logging added as part of
debugging those tests, I now saw things like:

```
read message from stdout: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
notification: Notification(JSONRPCNotification { jsonrpc: "2.0", method: "codex/event/exec_approval_request", params: Some(Object {"id": String("0"), "msg": Object {"type": String("exec_approval_request"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}, "conversationId": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6")}) })
read message from stdout: Request(JSONRPCRequest { id: Integer(0), jsonrpc: "2.0", method: "execCommandApproval", params: Some(Object {"conversation_id": String("c67b32c5-9475-41bf-8680-f4b4834ebcc6"), "call_id": String("call1"), "command": Array [String("python3"), String("-c"), String("print(42)")], "cwd": String("/tmp/.tmpFj2zwi/workdir")}) })
writing message to stdin: Response(JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} })
in read_stream_until_notification_message(codex/event/task_complete)
[mcp stderr] 2025-09-04T00:00:59.738585Z  INFO codex_mcp_server::message_processor: <- response: JSONRPCResponse { id: Integer(0), jsonrpc: "2.0", result: Object {"decision": String("approved")} }
[mcp stderr] 2025-09-04T00:00:59.738740Z DEBUG codex_core::codex: Submission sub=Submission { id: "1", op: ExecApproval { id: "0", decision: Approved } }
[mcp stderr] 2025-09-04T00:00:59.738832Z  WARN codex_core::codex: No pending approval found for sub_id: 0
```

That is, a response was sent for a request, but no callback was in place
to handle the response!

This time, I think I may have found the underlying issue (though the
fixes for holding locks for too long may have also been part of it),
which is I found cases where we were sending the request:


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L597

before inserting the `Sender` into the `pending_approvals` map (which
has to wait on acquiring a mutex):


https://github.com/openai/codex/blob/234c0a0469db222f05df08d00ae5032312f77427/codex-rs/core/src/codex.rs#L598-L601

so it is possible the request could go out and the client could respond
before `pending_approvals` was updated!

Note this was happening in both `request_command_approval()` and
`request_patch_approval()`, which maps to the sorts of errors we have
been seeing when these integration tests have been flaking on us.

While here, I am also adding some extra logging that prints if inserting
into `pending_approvals` overwrites an entry as opposed to purely
inserting one. Today, a conversation can have only one pending request
at a time, but as we are planning to support parallel tool calls, this
invariant may not continue to hold, in which case we need to revisit
this abstraction.
											
										
										
											2025-09-04 07:38:28 -07:00
+								        };
 								        if prev_entry.is_some() {
 								            warn!("Overwriting existing pending approval for sub_id: {event_id}");
 								        }
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        let event = EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
 								            call_id,
 								            changes: convert_apply_patch_to_protocol(action),
 								            reason,
 								            grant_root,
 								        });
 								        self.send_event(turn_context, event).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        rx_approve
 								    }
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    pub async fn notify_approval(&self, sub_id: &str, decision: ReviewDecision) {
-												fix: drop Mutex before calling tx_approve.send() (#2876)


											
										
										
											2025-08-28 22:49:29 -07:00
+								        let entry = {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            let mut active = self.active_turn.lock().await;
 								            match active.as_mut() {
 								                Some(at) => {
 								                    let mut ts = at.turn_state.lock().await;
 								                    ts.remove_pending_approval(sub_id)
 								                }
 								                None => None,
 								            }
-												fix: drop Mutex before calling tx_approve.send() (#2876)


											
										
										
											2025-08-28 22:49:29 -07:00
+								        };
 								        match entry {
 								            Some(tx_approve) => {
 								                tx_approve.send(decision).ok();
 								            }
 								            None => {
 								                warn!("No pending approval found for sub_id: {sub_id}");
 								            }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
 								    }
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    /// Records input items: always append to conversation history and
 								    /// persist these response items to rollout.
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
+								    pub(crate) async fn record_conversation_items(
 								        &self,
 								        turn_context: &TurnContext,
 								        items: &[ResponseItem],
 								    ) {
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								        self.record_into_history(items).await;
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        self.persist_rollout_response_items(items).await;
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
+								        self.send_raw_response_items(turn_context, items).await;
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								    }
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    fn reconstruct_history_from_rollout(
 								        &self,
 								        turn_context: &TurnContext,
 								        rollout_items: &[RolloutItem],
 								    ) -> Vec<ResponseItem> {
 								        let mut history = ConversationHistory::new();
 								        for item in rollout_items {
 								            match item {
 								                RolloutItem::ResponseItem(response_item) => {
 								                    history.record_items(std::iter::once(response_item));
 								                }
 								                RolloutItem::Compacted(compacted) => {
-												Move changing turn input functionalities to `ConversationHistory` (#5473)

We are doing some ad-hoc logic while dealing with conversation history.
Ideally, we shouldn't mutate `vec[responseitem]` manually at all and
should depend on `ConversationHistory` for those changes.

Those changes are:
- Adding input to the history
- Removing items from the history
- Correcting history

I am also adding some `error` logs for cases we shouldn't ideally face.
For example, we shouldn't be missing `toolcalls` or `outputs`. We
shouldn't hit `ContextWindowExceeded` while performing `compact`

This refactor will give us granular control over our context management.
											
										
										
											2025-10-22 13:08:46 -07:00
+								                    let snapshot = history.get_history();
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								                    let user_messages = collect_user_messages(&snapshot);
 								                    let rebuilt = build_compacted_history(
 								                        self.build_initial_context(turn_context),
 								                        &user_messages,
 								                        &compacted.message,
 								                    );
 								                    history.replace(rebuilt);
 								                }
 								                _ => {}
 								            }
 								        }
-												Move changing turn input functionalities to `ConversationHistory` (#5473)

We are doing some ad-hoc logic while dealing with conversation history.
Ideally, we shouldn't mutate `vec[responseitem]` manually at all and
should depend on `ConversationHistory` for those changes.

Those changes are:
- Adding input to the history
- Removing items from the history
- Correcting history

I am also adding some `error` logs for cases we shouldn't ideally face.
For example, we shouldn't be missing `toolcalls` or `outputs`. We
shouldn't hit `ContextWindowExceeded` while performing `compact`

This refactor will give us granular control over our context management.
											
										
										
											2025-10-22 13:08:46 -07:00
+								        history.get_history()
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    }
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    /// Append ResponseItems to the in-memory conversation history only.
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    async fn record_into_history(&self, items: &[ResponseItem]) {
 								        let mut state = self.state.lock().await;
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        state.record_items(items.iter());
 								    }
-												feat: TUI undo op (#5629)


											
										
										
											2025-10-27 10:55:29 +00:00
+								    pub(crate) async fn replace_history(&self, items: Vec<ResponseItem>) {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        let mut state = self.state.lock().await;
 								        state.replace_history(items);
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    }
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    async fn persist_rollout_response_items(&self, items: &[ResponseItem]) {
 								        let rollout_items: Vec<RolloutItem> = items
 								            .iter()
 								            .cloned()
 								            .map(RolloutItem::ResponseItem)
 								            .collect();
 								        self.persist_rollout_items(&rollout_items).await;
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								    }
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
+								    async fn send_raw_response_items(&self, turn_context: &TurnContext, items: &[ResponseItem]) {
 								        for item in items {
 								            self.send_event(turn_context, EventMsg::RawResponseItem(item.clone()))
 								                .await;
 								        }
 								    }
-												Reland "refactor transcript view to handle HistoryCells" (#3753)

Reland of #3538
											
										
										
											2025-09-18 13:55:53 -07:00
+								    pub(crate) fn build_initial_context(&self, turn_context: &TurnContext) -> Vec<ResponseItem> {
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        let mut items = Vec::<ResponseItem>::with_capacity(2);
 								        if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
 								            items.push(UserInstructions::new(user_instructions.to_string()).into());
 								        }
 								        items.push(ResponseItem::from(EnvironmentContext::new(
 								            Some(turn_context.cwd.clone()),
 								            Some(turn_context.approval_policy),
 								            Some(turn_context.sandbox_policy.clone()),
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            Some(self.user_shell().clone()),
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        )));
 								        items
 								    }
-												Add session loading support to Codex (#1602)

## Summary
- extend rollout format to store all session data in JSON
- add resume/write helpers for rollouts
- track session state after each conversation
- support `LoadSession` op to resume a previous rollout
- allow starting Codex with an existing session via
`experimental_resume` config variable

We need a way later for exploring the available sessions in a user
friendly way.

## Testing
- `cargo test --no-run` *(fails: `cargo: command not found`)*

------
https://chatgpt.com/codex/tasks/task_i_68792a29dd5c832190bf6930d3466fba

This video is outdated. you should use `-c experimental_resume:<full
path>` instead of `--resume <full path>`


https://github.com/user-attachments/assets/7a9975c7-aa04-4f4e-899a-9e87defd947a
											
										
										
											2025-07-18 17:04:04 -07:00
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    async fn persist_rollout_items(&self, items: &[RolloutItem]) {
-												feat: save session transcripts when using Rust CLI (#845)

This adds support for saving transcripts when using the Rust CLI. Like
the TypeScript CLI, it saves the transcript to `~/.codex/sessions`,
though it uses JSONL for the file format (and `.jsonl` for the file
extension) so that even if Codex crashes, what was written to the
`.jsonl` file should generally still be valid JSONL content.
											
										
										
											2025-05-07 13:49:15 -07:00
+								        let recorder = {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            let guard = self.services.rollout.lock().await;
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								            guard.clone()
-												feat: save session transcripts when using Rust CLI (#845)

This adds support for saving transcripts when using the Rust CLI. Like
the TypeScript CLI, it saves the transcript to `~/.codex/sessions`,
though it uses JSONL for the file format (and `.jsonl` for the file
extension) so that even if Codex crashes, what was written to the
`.jsonl` file should generally still be valid JSONL content.
											
										
										
											2025-05-07 13:49:15 -07:00
+								        };
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        if let Some(rec) = recorder
 								            && let Err(e) = rec.record_items(items).await
 								        {
 								            error!("failed to record rollout items: {e:#}");
 								        }
 								    }
-												feat: save session transcripts when using Rust CLI (#845)

This adds support for saving transcripts when using the Rust CLI. Like
the TypeScript CLI, it saves the transcript to `~/.codex/sessions`,
though it uses JSONL for the file format (and `.jsonl` for the file
extension) so that even if Codex crashes, what was written to the
`.jsonl` file should generally still be valid JSONL content.
											
										
										
											2025-05-07 13:49:15 -07:00
-												Move changing turn input functionalities to `ConversationHistory` (#5473)

We are doing some ad-hoc logic while dealing with conversation history.
Ideally, we shouldn't mutate `vec[responseitem]` manually at all and
should depend on `ConversationHistory` for those changes.

Those changes are:
- Adding input to the history
- Removing items from the history
- Correcting history

I am also adding some `error` logs for cases we shouldn't ideally face.
For example, we shouldn't be missing `toolcalls` or `outputs`. We
shouldn't hit `ContextWindowExceeded` while performing `compact`

This refactor will give us granular control over our context management.
											
										
										
											2025-10-22 13:08:46 -07:00
+								    pub(crate) async fn clone_history(&self) -> ConversationHistory {
 								        let state = self.state.lock().await;
 								        state.clone_history()
 								    }
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    async fn update_token_usage_info(
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								        &self,
 								        turn_context: &TurnContext,
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								        token_usage: Option<&TokenUsage>,
 								    ) {
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								        {
 								            let mut state = self.state.lock().await;
 								            if let Some(token_usage) = token_usage {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                state.update_token_info_from_usage(
 								                    token_usage,
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								                    turn_context.client.get_model_context_window(),
 								                );
 								            }
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								        }
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        self.send_token_count_event(turn_context).await;
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								    }
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    async fn update_rate_limits(
 								        &self,
 								        turn_context: &TurnContext,
 								        new_rate_limits: RateLimitSnapshot,
 								    ) {
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								        {
 								            let mut state = self.state.lock().await;
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            state.set_rate_limits(new_rate_limits);
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								        }
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        self.send_token_count_event(turn_context).await;
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								    }
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    async fn send_token_count_event(&self, turn_context: &TurnContext) {
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								        let (info, rate_limits) = {
 								            let state = self.state.lock().await;
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            state.token_info_and_rate_limits()
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								        };
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        let event = EventMsg::TokenCount(TokenCountEvent { info, rate_limits });
 								        self.send_event(turn_context, event).await;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    }
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    async fn set_total_tokens_full(&self, turn_context: &TurnContext) {
-												Surface context window error to the client (#4675)

In the past, we were treating `input exceeded context window` as a
streaming error and retrying on it. Retrying on it has no point because
it won't change the behavior. In this PR, we surface the error to the
client without retry and also send a token count event to indicate that
the context window is full.

<img width="650" height="125" alt="image"
src="https://github.com/user-attachments/assets/c26b1213-4c27-4bfc-90f4-51a270a3efd5"
/>
											
										
										
											2025-10-04 18:40:06 -07:00
+								        let context_window = turn_context.client.get_model_context_window();
 								        if let Some(context_window) = context_window {
 								            {
 								                let mut state = self.state.lock().await;
 								                state.set_token_usage_full(context_window);
 								            }
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								            self.send_token_count_event(turn_context).await;
-												Surface context window error to the client (#4675)

In the past, we were treating `input exceeded context window` as a
streaming error and retrying on it. Retrying on it has no point because
it won't change the behavior. In this PR, we surface the error to the
client without retry and also send a token count event to indicate that
the context window is full.

<img width="650" height="125" alt="image"
src="https://github.com/user-attachments/assets/c26b1213-4c27-4bfc-90f4-51a270a3efd5"
/>
											
										
										
											2025-10-04 18:40:06 -07:00
+								        }
 								    }
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								    /// Record a user input item to conversation history and also persist a
 								    /// corresponding UserMessage EventMsg to rollout.
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								    async fn record_input_and_rollout_usermsg(
 								        &self,
 								        turn_context: &TurnContext,
 								        response_input: &ResponseInputItem,
 								    ) {
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								        let response_item: ResponseItem = response_input.clone().into();
 								        // Add to conversation history and persist response item to rollout
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
+								        self.record_conversation_items(turn_context, std::slice::from_ref(&response_item))
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								            .await;
 								        // Derive user message events and persist only UserMessage to rollout
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								        let turn_item = parse_turn_item(&response_item);
 								        if let Some(item @ TurnItem::UserMessage(_)) = turn_item {
 								            self.emit_turn_item_started_completed(turn_context, item, false)
 								                .await;
-												feat: save session transcripts when using Rust CLI (#845)

This adds support for saving transcripts when using the Rust CLI. Like
the TypeScript CLI, it saves the transcript to `~/.codex/sessions`,
though it uses JSONL for the file format (and `.jsonl` for the file
extension) so that even if Codex crashes, what was written to the
`.jsonl` file should generally still be valid JSONL content.
											
										
										
											2025-05-07 13:49:15 -07:00
+								        }
 								    }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    /// Helper that emits a BackgroundEvent with the given message. This keeps
 								    /// the call‑sites terse so adding more diagnostics does not clutter the
 								    /// core agent logic.
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    pub(crate) async fn notify_background_event(
 								        &self,
 								        turn_context: &TurnContext,
 								        message: impl Into<String>,
 								    ) {
 								        let event = EventMsg::BackgroundEvent(BackgroundEventEvent {
 								            message: message.into(),
 								        });
 								        self.send_event(turn_context, event).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    async fn notify_stream_error(&self, turn_context: &TurnContext, message: impl Into<String>) {
 								        let event = EventMsg::StreamError(StreamErrorEvent {
 								            message: message.into(),
 								        });
 								        self.send_event(turn_context, event).await;
-												Parse and expose stream errors (#2540)


											
										
										
											2025-08-21 01:15:24 -07:00
+								    }
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								    async fn maybe_start_ghost_snapshot(
 								        self: &Arc<Self>,
 								        turn_context: Arc<TurnContext>,
 								        cancellation_token: CancellationToken,
 								    ) {
 								        if turn_context.is_review_mode
 								            || !self
 								                .state
 								                .lock()
 								                .await
 								                .session_configuration
 								                .features
 								                .enabled(Feature::GhostCommit)
 								        {
 								            return;
 								        }
 								        let token = match turn_context.tool_call_gate.subscribe().await {
 								            Ok(token) => token,
 								            Err(err) => {
 								                warn!("failed to subscribe to ghost snapshot readiness: {err}");
 								                return;
 								            }
 								        };
 								        info!("spawning ghost snapshot task");
 								        let task = GhostSnapshotTask::new(token);
 								        Arc::new(task)
 								            .run(
 								                Arc::new(SessionTaskContext::new(self.clone())),
 								                turn_context.clone(),
 								                Vec::new(),
 								                cancellation_token,
 								            )
 								            .await;
 								    }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    /// Returns the input if there was no task running to inject into
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								    pub async fn inject_input(&self, input: Vec<UserInput>) -> Result<(), Vec<UserInput>> {
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        let mut active = self.active_turn.lock().await;
 								        match active.as_mut() {
 								            Some(at) => {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                let mut ts = at.turn_state.lock().await;
 								                ts.push_pending_input(input.into());
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								                Ok(())
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            }
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								            None => Err(input),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
 								    }
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								    pub async fn get_pending_input(&self) -> Vec<ResponseInputItem> {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        let mut active = self.active_turn.lock().await;
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        match active.as_mut() {
 								            Some(at) => {
 								                let mut ts = at.turn_state.lock().await;
 								                ts.take_pending_input()
 								            }
 								            None => Vec::with_capacity(0),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
 								    }
-												[MCP] Add support for resources (#5239)

This PR adds support for [MCP
resources](https://modelcontextprotocol.io/specification/2025-06-18/server/resources)
by adding three new tools for the model:
1. `list_resources`
2. `list_resource_templates`
3. `read_resource`

These 3 tools correspond to the [three primary MCP resource protocol
messages](https://modelcontextprotocol.io/specification/2025-06-18/server/resources#protocol-messages).

Example of listing and reading a GitHub resource tempalte
<img width="2984" height="804" alt="CleanShot 2025-10-15 at 17 31 10"
src="https://github.com/user-attachments/assets/89b7f215-2e2a-41c5-90dd-b932ac84a585"
/>

`/mcp` with Figma configured
<img width="2984" height="442" alt="CleanShot 2025-10-15 at 18 29 35"
src="https://github.com/user-attachments/assets/a7578080-2ed2-4c59-b9b4-d8461f90d8ee"
/>

Fixes #4956
											
										
										
											2025-10-16 22:05:15 -07:00
+								    pub async fn list_resources(
 								        &self,
 								        server: &str,
 								        params: Option<ListResourcesRequestParams>,
 								    ) -> anyhow::Result<ListResourcesResult> {
 								        self.services
 								            .mcp_connection_manager
 								            .list_resources(server, params)
 								            .await
 								    }
 								    pub async fn list_resource_templates(
 								        &self,
 								        server: &str,
 								        params: Option<ListResourceTemplatesRequestParams>,
 								    ) -> anyhow::Result<ListResourceTemplatesResult> {
 								        self.services
 								            .mcp_connection_manager
 								            .list_resource_templates(server, params)
 								            .await
 								    }
 								    pub async fn read_resource(
 								        &self,
 								        server: &str,
 								        params: ReadResourceRequestParams,
 								    ) -> anyhow::Result<ReadResourceResult> {
 								        self.services
 								            .mcp_connection_manager
 								            .read_resource(server, params)
 								            .await
 								    }
-												fix: make all fields of Session struct private again (#840)

https://github.com/openai/codex/pull/829 noted it introduced a circular
dep between `codex.rs` and `mcp_tool_call.rs`. This attempts to clean
things up: the circular dep still exists, but at least all the fields of
`Session` are private again.
											
										
										
											2025-05-06 16:21:35 -07:00
+								    pub async fn call_tool(
 								        &self,
 								        server: &str,
 								        tool: &str,
 								        arguments: Option<serde_json::Value>,
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								    ) -> anyhow::Result<CallToolResult> {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        self.services
 								            .mcp_connection_manager
-												timeouts for mcp tool calls (#3959)

defaults to 60sec, overridable with MCP_TOOL_TIMEOUT or on a per-server
basis in the config.
											
										
										
											2025-09-22 10:30:59 -07:00
+								            .call_tool(server, tool, arguments)
-												fix: make all fields of Session struct private again (#840)

https://github.com/openai/codex/pull/829 noted it introduced a circular
dep between `codex.rs` and `mcp_tool_call.rs`. This attempts to clean
things up: the circular dep still exists, but at least all the fields of
`Session` are private again.
											
										
										
											2025-05-06 16:21:35 -07:00
+								            .await
 								    }
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								    pub(crate) fn parse_mcp_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
 								        self.services
 								            .mcp_connection_manager
 								            .parse_tool_name(tool_name)
 								    }
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    pub async fn interrupt_task(self: &Arc<Self>) {
-												fix: introduce EventMsg::TurnAborted (#2365)

Introduces `EventMsg::TurnAborted` that should be sent in response to
`Op::Interrupt`.

In the MCP server, updates the handling of a
`ClientRequest::InterruptConversation` request such that it sends the
`Op::Interrupt` but does not respond to the request until it sees an
`EventMsg::TurnAborted`.
											
										
										
											2025-08-17 21:40:31 -07:00
+								        info!("interrupt received: abort current task, if any");
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        self.abort_all_tasks(TurnAbortReason::Interrupted).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								    pub(crate) fn notifier(&self) -> &UserNotifier {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        &self.services.notifier
 								    }
-												chore: sandbox refactor 2 (#4653)

Revert the revert and fix the UI issue
											
										
										
											2025-10-03 11:17:39 +01:00
+								    pub(crate) fn user_shell(&self) -> &shell::Shell {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        &self.services.user_shell
 								    }
 								    fn show_raw_agent_reasoning(&self) -> bool {
 								        self.services.show_raw_agent_reasoning
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								    }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								}
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiver<Submission>) {
 								    let mut previous_context: Option<Arc<TurnContext>> = None;
-												chore: introduce ConversationManager as a clearinghouse for all conversations (#2240)

This PR does two things because after I got deep into the first one I
started pulling on the thread to the second:

- Makes `ConversationManager` the place where all in-memory
conversations are created and stored. Previously, `MessageProcessor` in
the `codex-mcp-server` crate was doing this via its `session_map`, but
this is something that should be done in `codex-core`.
- It unwinds the `ctrl_c: tokio::sync::Notify` that was threaded
throughout our code. I think this made sense at one time, but now that
we handle Ctrl-C within the TUI and have a proper `Op::Interrupt` event,
I don't think this was quite right, so I removed it. For `codex exec`
and `codex proto`, we now use `tokio::signal::ctrl_c()` directly, but we
no longer make `Notify` a field of `Codex` or `CodexConversation`.

Changes of note:

- Adds the files `conversation_manager.rs` and `codex_conversation.rs`
to `codex-core`.
- `Codex` and `CodexSpawnOk` are no longer exported from `codex-core`:
other crates must use `CodexConversation` instead (which is created via
`ConversationManager`).
- `core/src/codex_wrapper.rs` has been deleted in favor of
`ConversationManager`.
- `ConversationManager::new_conversation()` returns `NewConversation`,
which is in line with the `new_conversation` tool we want to add to the
MCP server. Note `NewConversation` includes `SessionConfiguredEvent`, so
we eliminate checks in cases like `codex-rs/core/tests/client.rs` to
verify `SessionConfiguredEvent` is the first event because that is now
internal to `ConversationManager`.
- Quite a bit of code was deleted from
`codex-rs/mcp-server/src/message_processor.rs` since it no longer has to
manage multiple conversations itself: it goes through
`ConversationManager` instead.
- `core/tests/live_agent.rs` has been deleted because I had to update a
bunch of tests and all the tests in here were ignored, and I don't think
anyone ever ran them, so this was just technical debt, at this point.
- Removed `notify_on_sigint()` from `util.rs` (and in a follow-up, I
hope to refactor the blandly-named `util.rs` into more descriptive
files).
- In general, I started replacing local variables named `codex` as
`conversation`, where appropriate, though admittedly I didn't do it
through all the integration tests because that would have added a lot of
noise to this PR.




---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2240).
* #2264
* #2263
* __->__ #2240
											
										
										
											2025-08-13 13:38:18 -07:00
+								    // To break out of this loop, send Op::Shutdown.
 								    while let Ok(sub) = rx_sub.recv().await {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        debug!(?sub, "Submission");
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								        match sub.op.clone() {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            Op::Interrupt => {
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								                handlers::interrupt(&sess).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								            Op::OverrideTurnContext {
 								                cwd,
 								                approval_policy,
 								                sandbox_policy,
 								                model,
 								                effort,
 								                summary,
 								            } => {
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								                handlers::override_turn_context(
 								                    &sess,
 								                    SessionSettingsUpdate {
-												[prompt] xml-format EnvironmentContext (#2272)

## Summary
Before we land #2243, let's start printing environment_context in our
preferred format. This struct will evolve over time with new
information, xml gives us a balance of human readable without too much
parsing, llm readable, and extensible.

Also moves us over to an Option-based struct, so we can easily provide
diffs to the model.

## Testing
- [x] Updated tests to reflect new format
											
										
										
											2025-08-20 23:45:16 -07:00
+								                        cwd,
 								                        approval_policy,
 								                        sandbox_policy,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								                        model,
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								                        reasoning_effort: effort,
 								                        reasoning_summary: summary,
 								                        ..Default::default()
 								                    },
 								                )
 								                .await;
 								            }
 								            Op::UserInput { .. } | Op::UserTurn { .. } => {
 								                handlers::user_input_or_turn(&sess, sub.id.clone(), sub.op, &mut previous_context)
 								                    .await;
 								            }
 								            Op::ExecApproval { id, decision } => {
 								                handlers::exec_approval(&sess, id, decision).await;
 								            }
 								            Op::PatchApproval { id, decision } => {
 								                handlers::patch_approval(&sess, id, decision).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								            Op::AddToHistory { text } => {
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								                handlers::add_to_history(&sess, &config, text).await;
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								            }
 								            Op::GetHistoryEntryRequest { offset, log_id } => {
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								                handlers::get_history_entry_request(&sess, &config, sub.id.clone(), offset, log_id)
 								                    .await;
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								            }
-												[tui] Support /mcp command (#2430)

## Summary
Adds a `/mcp` command to list active tools. We can extend this command
to allow configuration of MCP tools, but for now a simple list command
will help debug if your config.toml and your tools are working as
expected.
											
										
										
											2025-08-19 09:00:31 -07:00
+								            Op::ListMcpTools => {
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								                handlers::list_mcp_tools(&sess, &config, sub.id.clone()).await;
-												[tui] Support /mcp command (#2430)

## Summary
Adds a `/mcp` command to list active tools. We can extend this command
to allow configuration of MCP tools, but for now a simple list command
will help debug if your config.toml and your tools are working as
expected.
											
										
										
											2025-08-19 09:00:31 -07:00
+								            }
-												Custom /prompts (#2696)

Adds custom `/prompts` to `~/.codex/prompts/<command>.md`.

<img width="239" height="107" alt="Screenshot 2025-08-25 at 6 22 42 PM"
src="https://github.com/user-attachments/assets/fe6ebbaa-1bf6-49d3-95f9-fdc53b752679"
/>

---

Details:

1. Adds `Op::ListCustomPrompts` to core.
2. Returns `ListCustomPromptsResponse` with list of `CustomPrompt`
(name, content).
3. TUI calls the operation on load, and populates the custom prompts
(excluding prompts that collide with builtins).
4. Selecting the custom prompt automatically sends the prompt to the
agent.
											
										
										
											2025-08-28 19:16:39 -07:00
+								            Op::ListCustomPrompts => {
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								                handlers::list_custom_prompts(&sess, sub.id.clone()).await;
-												Custom /prompts (#2696)

Adds custom `/prompts` to `~/.codex/prompts/<command>.md`.

<img width="239" height="107" alt="Screenshot 2025-08-25 at 6 22 42 PM"
src="https://github.com/user-attachments/assets/fe6ebbaa-1bf6-49d3-95f9-fdc53b752679"
/>

---

Details:

1. Adds `Op::ListCustomPrompts` to core.
2. Returns `ListCustomPromptsResponse` with list of `CustomPrompt`
(name, content).
3. TUI calls the operation on load, and populates the custom prompts
(excluding prompts that collide with builtins).
4. Selecting the custom prompt automatically sends the prompt to the
agent.
											
										
										
											2025-08-28 19:16:39 -07:00
+								            }
-												feat: TUI undo op (#5629)


											
										
										
											2025-10-27 10:55:29 +00:00
+								            Op::Undo => {
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								                handlers::undo(&sess, sub.id.clone()).await;
-												feat: TUI undo op (#5629)


											
										
										
											2025-10-27 10:55:29 +00:00
+								            }
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								            Op::Compact => {
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								                handlers::compact(&sess, sub.id.clone()).await;
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								            }
-												feature: Add "!cmd" user shell execution (#2471)

feature: Add "!cmd" user shell execution

This change lets users run local shell commands directly from the TUI by
prefixing their input with ! (e.g. !ls). Output is truncated to keep the
exec cell usable, and Ctrl-C cleanly
  interrupts long-running commands (e.g. !sleep 10000).

**Summary of changes**

- Route Op::RunUserShellCommand through a dedicated UserShellCommandTask
(core/src/tasks/user_shell.rs), keeping the task logic out of codex.rs.
- Reuse the existing tool router: the task constructs a ToolCall for the
local_shell tool and relies on ShellHandler, so no manual MCP tool
lookup is required.
- Emit exec lifecycle events (ExecCommandBegin/ExecCommandEnd) so the
TUI can show command metadata, live output, and exit status.

**End-to-end flow**

  **TUI handling**

1. ChatWidget::submit_user_message (TUI) intercepts messages starting
with !.
2. Non-empty commands dispatch Op::RunUserShellCommand { command };
empty commands surface a help hint.
3. No UserInput items are created, so nothing is enqueued for the model.

  **Core submission loop**
4. The submission loop routes the op to handlers::run_user_shell_command
(core/src/codex.rs).
5. A fresh TurnContext is created and Session::spawn_user_shell_command
enqueues UserShellCommandTask.

  **Task execution**
6. UserShellCommandTask::run emits TaskStartedEvent, formats the
command, and prepares a ToolCall targeting local_shell.
  7. ToolCallRuntime::handle_tool_call dispatches to ShellHandler.

  **Shell tool runtime**
8. ShellHandler::run_exec_like launches the process via the unified exec
runtime, honoring sandbox and shell policies, and emits
ExecCommandBegin/End.
9. Stdout/stderr are captured for the UI, but the task does not turn the
resulting ToolOutput into a model response.

  **Completion**
10. After ExecCommandEnd, the task finishes without an assistant
message; the session marks it complete and the exec cell displays the
final output.

  **Conversation context**

- The command and its output never enter the conversation history or the
model prompt; the flow is local-only.
  - Only exec/task events are emitted for UI rendering.

**Demo video**


https://github.com/user-attachments/assets/fcd114b0-4304-4448-a367-a04c43e0b996
											
										
										
											2025-10-29 00:31:20 -07:00
+								            Op::RunUserShellCommand { command } => {
 								                handlers::run_user_shell_command(
 								                    &sess,
 								                    sub.id.clone(),
 								                    command,
 								                    &mut previous_context,
 								                )
 								                .await;
 								            }
-												Flaky CI fix (#1647)

Flushing before sending `TaskCompleteEvent` and ending the submission
loop to avoid race conditions.
											
										
										
											2025-07-23 15:03:26 -07:00
+								            Op::Shutdown => {
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								                if handlers::shutdown(&sess, sub.id.clone()).await {
 								                    break;
-												Flaky CI fix (#1647)

Flushing before sending `TaskCompleteEvent` and ending the submission
loop to avoid race conditions.
											
										
										
											2025-07-23 15:03:26 -07:00
+								                }
 								            }
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								            Op::Review { review_request } => {
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								                handlers::review(&sess, &config, sub.id.clone(), review_request).await;
 								            }
 								            _ => {} // Ignore unknown ops; enum is non_exhaustive to allow extensions.
 								        }
 								    }
 								    debug!("Agent loop exited");
 								}
 								/// Operation handlers
 								mod handlers {
 								    use crate::codex::Session;
 								    use crate::codex::SessionSettingsUpdate;
 								    use crate::codex::TurnContext;
 								    use crate::codex::compact;
 								    use crate::codex::spawn_review_thread;
 								    use crate::config::Config;
 								    use crate::mcp::auth::compute_auth_statuses;
 								    use crate::tasks::CompactTask;
 								    use crate::tasks::RegularTask;
 								    use crate::tasks::UndoTask;
-												feature: Add "!cmd" user shell execution (#2471)

feature: Add "!cmd" user shell execution

This change lets users run local shell commands directly from the TUI by
prefixing their input with ! (e.g. !ls). Output is truncated to keep the
exec cell usable, and Ctrl-C cleanly
  interrupts long-running commands (e.g. !sleep 10000).

**Summary of changes**

- Route Op::RunUserShellCommand through a dedicated UserShellCommandTask
(core/src/tasks/user_shell.rs), keeping the task logic out of codex.rs.
- Reuse the existing tool router: the task constructs a ToolCall for the
local_shell tool and relies on ShellHandler, so no manual MCP tool
lookup is required.
- Emit exec lifecycle events (ExecCommandBegin/ExecCommandEnd) so the
TUI can show command metadata, live output, and exit status.

**End-to-end flow**

  **TUI handling**

1. ChatWidget::submit_user_message (TUI) intercepts messages starting
with !.
2. Non-empty commands dispatch Op::RunUserShellCommand { command };
empty commands surface a help hint.
3. No UserInput items are created, so nothing is enqueued for the model.

  **Core submission loop**
4. The submission loop routes the op to handlers::run_user_shell_command
(core/src/codex.rs).
5. A fresh TurnContext is created and Session::spawn_user_shell_command
enqueues UserShellCommandTask.

  **Task execution**
6. UserShellCommandTask::run emits TaskStartedEvent, formats the
command, and prepares a ToolCall targeting local_shell.
  7. ToolCallRuntime::handle_tool_call dispatches to ShellHandler.

  **Shell tool runtime**
8. ShellHandler::run_exec_like launches the process via the unified exec
runtime, honoring sandbox and shell policies, and emits
ExecCommandBegin/End.
9. Stdout/stderr are captured for the UI, but the task does not turn the
resulting ToolOutput into a model response.

  **Completion**
10. After ExecCommandEnd, the task finishes without an assistant
message; the session marks it complete and the exec cell displays the
final output.

  **Conversation context**

- The command and its output never enter the conversation history or the
model prompt; the flow is local-only.
  - Only exec/task events are emitted for UI rendering.

**Demo video**


https://github.com/user-attachments/assets/fcd114b0-4304-4448-a367-a04c43e0b996
											
										
										
											2025-10-29 00:31:20 -07:00
+								    use crate::tasks::UserShellCommandTask;
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								    use codex_protocol::custom_prompts::CustomPrompt;
 								    use codex_protocol::protocol::ErrorEvent;
 								    use codex_protocol::protocol::Event;
 								    use codex_protocol::protocol::EventMsg;
 								    use codex_protocol::protocol::ListCustomPromptsResponseEvent;
 								    use codex_protocol::protocol::Op;
 								    use codex_protocol::protocol::ReviewDecision;
 								    use codex_protocol::protocol::ReviewRequest;
 								    use codex_protocol::protocol::TurnAbortReason;
 								    use codex_protocol::user_input::UserInput;
 								    use std::sync::Arc;
 								    use tracing::info;
 								    use tracing::warn;
 								    pub async fn interrupt(sess: &Arc<Session>) {
 								        sess.interrupt_task().await;
 								    }
 								    pub async fn override_turn_context(sess: &Session, updates: SessionSettingsUpdate) {
 								        sess.update_settings(updates).await;
 								    }
 								    pub async fn user_input_or_turn(
 								        sess: &Arc<Session>,
 								        sub_id: String,
 								        op: Op,
 								        previous_context: &mut Option<Arc<TurnContext>>,
 								    ) {
 								        let (items, updates) = match op {
 								            Op::UserTurn {
 								                cwd,
 								                approval_policy,
 								                sandbox_policy,
 								                model,
 								                effort,
 								                summary,
 								                final_output_json_schema,
 								                items,
 								            } => (
 								                items,
 								                SessionSettingsUpdate {
 								                    cwd: Some(cwd),
 								                    approval_policy: Some(approval_policy),
 								                    sandbox_policy: Some(sandbox_policy),
 								                    model: Some(model),
 								                    reasoning_effort: Some(effort),
 								                    reasoning_summary: Some(summary),
 								                    final_output_json_schema: Some(final_output_json_schema),
 								                },
 								            ),
 								            Op::UserInput { items } => (items, SessionSettingsUpdate::default()),
 								            _ => unreachable!(),
 								        };
 								        let current_context = sess.new_turn_with_sub_id(sub_id, updates).await;
 								        current_context
 								            .client
 								            .get_otel_event_manager()
 								            .user_prompt(&items);
 								        // Attempt to inject input into current task
 								        if let Err(items) = sess.inject_input(items).await {
 								            if let Some(env_item) =
 								                sess.build_environment_update_item(previous_context.as_ref(), &current_context)
 								            {
 								                sess.record_conversation_items(&current_context, std::slice::from_ref(&env_item))
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                    .await;
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								            }
 								            sess.spawn_task(Arc::clone(&current_context), items, RegularTask)
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								                .await;
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								            *previous_context = Some(current_context);
 								        }
 								    }
-												feature: Add "!cmd" user shell execution (#2471)

feature: Add "!cmd" user shell execution

This change lets users run local shell commands directly from the TUI by
prefixing their input with ! (e.g. !ls). Output is truncated to keep the
exec cell usable, and Ctrl-C cleanly
  interrupts long-running commands (e.g. !sleep 10000).

**Summary of changes**

- Route Op::RunUserShellCommand through a dedicated UserShellCommandTask
(core/src/tasks/user_shell.rs), keeping the task logic out of codex.rs.
- Reuse the existing tool router: the task constructs a ToolCall for the
local_shell tool and relies on ShellHandler, so no manual MCP tool
lookup is required.
- Emit exec lifecycle events (ExecCommandBegin/ExecCommandEnd) so the
TUI can show command metadata, live output, and exit status.

**End-to-end flow**

  **TUI handling**

1. ChatWidget::submit_user_message (TUI) intercepts messages starting
with !.
2. Non-empty commands dispatch Op::RunUserShellCommand { command };
empty commands surface a help hint.
3. No UserInput items are created, so nothing is enqueued for the model.

  **Core submission loop**
4. The submission loop routes the op to handlers::run_user_shell_command
(core/src/codex.rs).
5. A fresh TurnContext is created and Session::spawn_user_shell_command
enqueues UserShellCommandTask.

  **Task execution**
6. UserShellCommandTask::run emits TaskStartedEvent, formats the
command, and prepares a ToolCall targeting local_shell.
  7. ToolCallRuntime::handle_tool_call dispatches to ShellHandler.

  **Shell tool runtime**
8. ShellHandler::run_exec_like launches the process via the unified exec
runtime, honoring sandbox and shell policies, and emits
ExecCommandBegin/End.
9. Stdout/stderr are captured for the UI, but the task does not turn the
resulting ToolOutput into a model response.

  **Completion**
10. After ExecCommandEnd, the task finishes without an assistant
message; the session marks it complete and the exec cell displays the
final output.

  **Conversation context**

- The command and its output never enter the conversation history or the
model prompt; the flow is local-only.
  - Only exec/task events are emitted for UI rendering.

**Demo video**


https://github.com/user-attachments/assets/fcd114b0-4304-4448-a367-a04c43e0b996
											
										
										
											2025-10-29 00:31:20 -07:00
+								    pub async fn run_user_shell_command(
 								        sess: &Arc<Session>,
 								        sub_id: String,
 								        command: String,
 								        previous_context: &mut Option<Arc<TurnContext>>,
 								    ) {
 								        let turn_context = sess
 								            .new_turn_with_sub_id(sub_id, SessionSettingsUpdate::default())
 								            .await;
 								        sess.spawn_task(
 								            Arc::clone(&turn_context),
 								            Vec::new(),
 								            UserShellCommandTask::new(command),
 								        )
 								        .await;
 								        *previous_context = Some(turn_context);
 								    }
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								    pub async fn exec_approval(sess: &Arc<Session>, id: String, decision: ReviewDecision) {
 								        match decision {
 								            ReviewDecision::Abort => {
 								                sess.interrupt_task().await;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								            }
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								            other => sess.notify_approval(&id, other).await,
 								        }
 								    }
 								    pub async fn patch_approval(sess: &Arc<Session>, id: String, decision: ReviewDecision) {
 								        match decision {
 								            ReviewDecision::Abort => {
 								                sess.interrupt_task().await;
-												fix: introduce codex-protocol crate (#2355)


											
										
										
											2025-08-15 12:44:40 -07:00
+								            }
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
+								            other => sess.notify_approval(&id, other).await,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
 								    }
-												chore: decompose submission loop (#5854)


											
										
										
											2025-10-28 15:23:46 +00:00
 								    pub async fn add_to_history(sess: &Arc<Session>, config: &Arc<Config>, text: String) {
 								        let id = sess.conversation_id;
 								        let config = Arc::clone(config);
 								        tokio::spawn(async move {
 								            if let Err(e) = crate::message_history::append_entry(&text, &id, &config).await {
 								                warn!("failed to append to message history: {e}");
 								            }
 								        });
 								    }
 								    pub async fn get_history_entry_request(
 								        sess: &Arc<Session>,
 								        config: &Arc<Config>,
 								        sub_id: String,
 								        offset: usize,
 								        log_id: u64,
 								    ) {
 								        let config = Arc::clone(config);
 								        let sess_clone = Arc::clone(sess);
 								        tokio::spawn(async move {
 								            // Run lookup in blocking thread because it does file IO + locking.
 								            let entry_opt = tokio::task::spawn_blocking(move || {
 								                crate::message_history::lookup(log_id, offset, &config)
 								            })
 								            .await
 								            .unwrap_or(None);
 								            let event = Event {
 								                id: sub_id,
 								                msg: EventMsg::GetHistoryEntryResponse(
 								                    crate::protocol::GetHistoryEntryResponseEvent {
 								                        offset,
 								                        log_id,
 								                        entry: entry_opt.map(|e| codex_protocol::message_history::HistoryEntry {
 								                            conversation_id: e.session_id,
 								                            ts: e.ts,
 								                            text: e.text,
 								                        }),
 								                    },
 								                ),
 								            };
 								            sess_clone.send_event_raw(event).await;
 								        });
 								    }
 								    pub async fn list_mcp_tools(sess: &Session, config: &Arc<Config>, sub_id: String) {
 								        // This is a cheap lookup from the connection manager's cache.
 								        let tools = sess.services.mcp_connection_manager.list_all_tools();
 								        let (auth_status_entries, resources, resource_templates) = tokio::join!(
 								            compute_auth_statuses(
 								                config.mcp_servers.iter(),
 								                config.mcp_oauth_credentials_store_mode,
 								            ),
 								            sess.services.mcp_connection_manager.list_all_resources(),
 								            sess.services
 								                .mcp_connection_manager
 								                .list_all_resource_templates()
 								        );
 								        let auth_statuses = auth_status_entries
 								            .iter()
 								            .map(|(name, entry)| (name.clone(), entry.auth_status))
 								            .collect();
 								        let event = Event {
 								            id: sub_id,
 								            msg: EventMsg::McpListToolsResponse(crate::protocol::McpListToolsResponseEvent {
 								                tools,
 								                resources,
 								                resource_templates,
 								                auth_statuses,
 								            }),
 								        };
 								        sess.send_event_raw(event).await;
 								    }
 								    pub async fn list_custom_prompts(sess: &Session, sub_id: String) {
 								        let custom_prompts: Vec<CustomPrompt> =
 								            if let Some(dir) = crate::custom_prompts::default_prompts_dir() {
 								                crate::custom_prompts::discover_prompts_in(&dir).await
 								            } else {
 								                Vec::new()
 								            };
 								        let event = Event {
 								            id: sub_id,
 								            msg: EventMsg::ListCustomPromptsResponse(ListCustomPromptsResponseEvent {
 								                custom_prompts,
 								            }),
 								        };
 								        sess.send_event_raw(event).await;
 								    }
 								    pub async fn undo(sess: &Arc<Session>, sub_id: String) {
 								        let turn_context = sess
 								            .new_turn_with_sub_id(sub_id, SessionSettingsUpdate::default())
 								            .await;
 								        sess.spawn_task(turn_context, Vec::new(), UndoTask::new())
 								            .await;
 								    }
 								    pub async fn compact(sess: &Arc<Session>, sub_id: String) {
 								        let turn_context = sess
 								            .new_turn_with_sub_id(sub_id, SessionSettingsUpdate::default())
 								            .await;
 								        // Attempt to inject input into current task
 								        if let Err(items) = sess
 								            .inject_input(vec![UserInput::Text {
 								                text: compact::SUMMARIZATION_PROMPT.to_string(),
 								            }])
 								            .await
 								        {
 								            sess.spawn_task(Arc::clone(&turn_context), items, CompactTask)
 								                .await;
 								        }
 								    }
 								    pub async fn shutdown(sess: &Arc<Session>, sub_id: String) -> bool {
 								        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
 								        info!("Shutting down Codex instance");
 								        // Gracefully flush and shutdown rollout recorder on session end so tests
 								        // that inspect the rollout file do not race with the background writer.
 								        let recorder_opt = {
 								            let mut guard = sess.services.rollout.lock().await;
 								            guard.take()
 								        };
 								        if let Some(rec) = recorder_opt
 								            && let Err(e) = rec.shutdown().await
 								        {
 								            warn!("failed to shutdown rollout recorder: {e}");
 								            let event = Event {
 								                id: sub_id.clone(),
 								                msg: EventMsg::Error(ErrorEvent {
 								                    message: "Failed to shutdown rollout recorder".to_string(),
 								                }),
 								            };
 								            sess.send_event_raw(event).await;
 								        }
 								        let event = Event {
 								            id: sub_id,
 								            msg: EventMsg::ShutdownComplete,
 								        };
 								        sess.send_event_raw(event).await;
 								        true
 								    }
 								    pub async fn review(
 								        sess: &Arc<Session>,
 								        config: &Arc<Config>,
 								        sub_id: String,
 								        review_request: ReviewRequest,
 								    ) {
 								        let turn_context = sess
 								            .new_turn_with_sub_id(sub_id.clone(), SessionSettingsUpdate::default())
 								            .await;
 								        spawn_review_thread(
 								            Arc::clone(sess),
 								            Arc::clone(config),
 								            turn_context.clone(),
 								            sub_id,
 								            review_request,
 								        )
 								        .await;
 								    }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								}
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								/// Spawn a review thread using the given prompt.
 								async fn spawn_review_thread(
 								    sess: Arc<Session>,
 								    config: Arc<Config>,
 								    parent_turn_context: Arc<TurnContext>,
 								    sub_id: String,
 								    review_request: ReviewRequest,
 								) {
 								    let model = config.review_model.clone();
 								    let review_model_family = find_family_for_model(&model)
 								        .unwrap_or_else(|| parent_turn_context.client.get_model_family());
-												Enable plan tool by default (#5384)

## Summary
- make the plan tool available by default by removing the feature flag
and always registering the handler
- drop plan-tool CLI and API toggles across the exec, TUI, MCP server,
and app server code paths
- update tests and configs to reflect the always-on plan tool and guard
workspace restriction tests against env leakage

## Testing
Manually tested the extension. 
------
https://chatgpt.com/codex/tasks/task_i_68f67a3ff2d083209562a773f814c1f9
											
										
										
											2025-10-21 09:25:05 -07:00
+								    // For reviews, disable web_search and view_image regardless of global settings.
-												feat: feature flag (#4948)

Add proper feature flag instead of having custom flags for everything.
This is just for experimental/wip part of the code
It can be used through CLI:
```bash
codex --enable unified_exec --disable view_image_tool
```

Or in the `config.toml`
```toml
# Global toggles applied to every profile unless overridden.
[features]
apply_patch_freeform = true
view_image_tool = false
```

Follow-up:
In a following PR, the goal is to have a default have `bundles` of
features that we can associate to a model
											
										
										
											2025-10-14 18:50:00 +01:00
+								    let mut review_features = config.features.clone();
 								    review_features.disable(crate::features::Feature::WebSearchRequest);
 								    review_features.disable(crate::features::Feature::ViewImageTool);
 								    review_features.disable(crate::features::Feature::StreamableShell);
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    let tools_config = ToolsConfig::new(&ToolsConfigParams {
 								        model_family: &review_model_family,
-												feat: feature flag (#4948)

Add proper feature flag instead of having custom flags for everything.
This is just for experimental/wip part of the code
It can be used through CLI:
```bash
codex --enable unified_exec --disable view_image_tool
```

Or in the `config.toml`
```toml
# Global toggles applied to every profile unless overridden.
[features]
apply_patch_freeform = true
view_image_tool = false
```

Follow-up:
In a following PR, the goal is to have a default have `bundles` of
features that we can associate to a model
											
										
										
											2025-10-14 18:50:00 +01:00
+								        features: &review_features,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    });
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								    let base_instructions = REVIEW_PROMPT.to_string();
 								    let review_prompt = review_request.prompt.clone();
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    let provider = parent_turn_context.client.get_provider();
 								    let auth_manager = parent_turn_context.client.get_auth_manager();
 								    let model_family = review_model_family.clone();
 								    // Build per‑turn client with the requested model/family.
 								    let mut per_turn_config = (*config).clone();
 								    per_turn_config.model = model.clone();
 								    per_turn_config.model_family = model_family.clone();
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								    per_turn_config.model_reasoning_effort = Some(ReasoningEffortConfig::Low);
 								    per_turn_config.model_reasoning_summary = ReasoningSummaryConfig::Detailed;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    if let Some(model_info) = get_model_info(&model_family) {
 								        per_turn_config.model_context_window = Some(model_info.context_window);
 								    }
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								    let otel_event_manager = parent_turn_context
 								        .client
 								        .get_otel_event_manager()
 								        .with_model(
 								            per_turn_config.model.as_str(),
 								            per_turn_config.model_family.slug.as_str(),
 								        );
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								    let per_turn_config = Arc::new(per_turn_config);
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    let client = ModelClient::new(
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								        per_turn_config.clone(),
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        auth_manager,
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        otel_event_manager,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        provider,
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								        per_turn_config.model_reasoning_effort,
 								        per_turn_config.model_reasoning_summary,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        sess.conversation_id,
 								    );
 								    let review_turn_context = TurnContext {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        sub_id: sub_id.to_string(),
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        client,
 								        tools_config,
 								        user_instructions: None,
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								        base_instructions: Some(base_instructions.clone()),
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        approval_policy: parent_turn_context.approval_policy,
 								        sandbox_policy: parent_turn_context.sandbox_policy.clone(),
 								        shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
 								        cwd: parent_turn_context.cwd.clone(),
 								        is_review_mode: true,
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								        final_output_json_schema: None,
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								        codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								        tool_call_gate: Arc::new(ReadinessFlag::new()),
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    };
 								    // Seed the child task with the review prompt as the initial user message.
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								    let input: Vec<UserInput> = vec![UserInput::Text {
-												Remove instruction hack for /review (#4896)

We use to put the review prompt in the first user message as well to
bypass statsig overrides, but now that's been resolved and instructions
are being respected, so we're duplicating the review instructions.
											
										
										
											2025-10-07 12:47:00 -07:00
+								        text: review_prompt,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    }];
 								    let tc = Arc::new(review_turn_context);
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    sess.spawn_task(tc.clone(), input, ReviewTask).await;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
 								    // Announce entering review mode so UIs can switch modes.
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    sess.send_event(&tc, EventMsg::EnteredReviewMode(review_request))
 								        .await;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								}
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								/// Takes a user message as input and runs a loop where, at each turn, the model
 								/// replies with either:
 								///
 								/// - requested function calls
 								/// - an assistant message
 								///
 								/// While it is possible for the model to return multiple of these items in a
 								/// single turn, in practice, we generally one item per turn:
 								///
 								/// - If the model requests a function call, we execute it and send the output
 								///   back to the model in the next turn.
 								/// - If the model sends only an assistant message, we record it in the
 								///   conversation history and consider the task complete.
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								///
 								/// Review mode: when `turn_context.is_review_mode` is true, the turn runs in an
 								/// isolated in-memory thread without the parent session's prior history or
 								/// user_instructions. Emits ExitedReviewMode upon final review message.
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								pub(crate) async fn run_task(
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								    sess: Arc<Session>,
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    turn_context: Arc<TurnContext>,
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								    input: Vec<UserInput>,
-												feat: add header for task kind (#5142)

Add a header in the responses API request for the task kind (compact,
review, ...) for observability purpose
The header name is `codex-task-type`
											
										
										
											2025-10-14 16:17:00 +01:00
+								    task_kind: TaskKind,
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								    cancellation_token: CancellationToken,
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								) -> Option<String> {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    if input.is_empty() {
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        return None;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    let event = EventMsg::TaskStarted(TaskStartedEvent {
 								        model_context_window: turn_context.client.get_model_context_window(),
 								    });
 								    sess.send_event(&turn_context, event).await;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    // For review threads, keep an isolated in-memory history so the
 								    // model sees a fresh conversation without the parent session's history.
 								    // For normal turns, continue recording to the session history as before.
 								    let is_review_mode = turn_context.is_review_mode;
-												Move changing turn input functionalities to `ConversationHistory` (#5473)

We are doing some ad-hoc logic while dealing with conversation history.
Ideally, we shouldn't mutate `vec[responseitem]` manually at all and
should depend on `ConversationHistory` for those changes.

Those changes are:
- Adding input to the history
- Removing items from the history
- Correcting history

I am also adding some `error` logs for cases we shouldn't ideally face.
For example, we shouldn't be missing `toolcalls` or `outputs`. We
shouldn't hit `ContextWindowExceeded` while performing `compact`

This refactor will give us granular control over our context management.
											
										
										
											2025-10-22 13:08:46 -07:00
 								    let mut review_thread_history: ConversationHistory = ConversationHistory::new();
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    if is_review_mode {
-												Review mode core updates (#3701)

1. Adds the environment prompt (including cwd) to review thread
2. Prepends the review prompt as a user message (temporary fix so the
instructions are not replaced on backend)
3. Sets reasoning to low
4. Sets default review model to `gpt-5-codex`
											
										
										
											2025-09-16 13:36:51 -07:00
+								        // Seed review threads with environment context so the model knows the working directory.
-												Move changing turn input functionalities to `ConversationHistory` (#5473)

We are doing some ad-hoc logic while dealing with conversation history.
Ideally, we shouldn't mutate `vec[responseitem]` manually at all and
should depend on `ConversationHistory` for those changes.

Those changes are:
- Adding input to the history
- Removing items from the history
- Correcting history

I am also adding some `error` logs for cases we shouldn't ideally face.
For example, we shouldn't be missing `toolcalls` or `outputs`. We
shouldn't hit `ContextWindowExceeded` while performing `compact`

This refactor will give us granular control over our context management.
											
										
										
											2025-10-22 13:08:46 -07:00
+								        review_thread_history
 								            .record_items(sess.build_initial_context(turn_context.as_ref()).iter());
 								        review_thread_history.record_items(std::iter::once(&initial_input_for_turn.into()));
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    } else {
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								        sess.record_input_and_rollout_usermsg(turn_context.as_ref(), &initial_input_for_turn)
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								            .await;
 								    }
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								    sess.maybe_start_ghost_snapshot(Arc::clone(&turn_context), cancellation_token.child_token())
 								        .await;
-												[core] Allow resume after client errors (#2053)

## Summary
Allow tui conversations to resume after the client fails out of retries.
I tested this with exec / mocked api failures as well, and it appears to
be fine. But happy to add an exec integration test as well!

## Testing
- [x] Added integration test
- [x] Tested locally
											
										
										
											2025-08-08 18:21:19 -07:00
+								    let mut last_agent_message: Option<String> = None;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								    // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
 								    // many turns, from the perspective of the user, it is a single turn.
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								    let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    let mut auto_compact_recently_attempted = false;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    loop {
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								        // Note that pending_input would be something like a message the user
 								        // submitted through the UI while the model was running. Though the UI
 								        // may support this, the model might not.
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								        let pending_input = sess
 								            .get_pending_input()
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								            .await
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								            .into_iter()
 								            .map(ResponseItem::from)
 								            .collect::<Vec<ResponseItem>>();
-												feat: support the chat completions API in the Rust CLI (#862)

This is a substantial PR to add support for the chat completions API,
which in turn makes it possible to use non-OpenAI model providers (just
like in the TypeScript CLI):

* It moves a number of structs from `client.rs` to `client_common.rs` so
they can be shared.
* It introduces support for the chat completions API in
`chat_completions.rs`.
* It updates `ModelProviderInfo` so that `env_key` is `Option<String>`
instead of `String` (for e.g., ollama) and adds a `wire_api` field
* It updates `client.rs` to choose between `stream_responses()` and
`stream_chat_completions()` based on the `wire_api` for the
`ModelProviderInfo`
* It updates the `exec` and TUI CLIs to no longer fail if the
`OPENAI_API_KEY` environment variable is not set
* It updates the TUI so that `EventMsg::Error` is displayed more
prominently when it occurs, particularly now that it is important to
alert users to the `CodexErr::EnvVar` variant.
* `CodexErr::EnvVar` was updated to include an optional `instructions`
field so we can preserve the behavior where we direct users to
https://platform.openai.com if `OPENAI_API_KEY` is not set.
* Cleaned up the "welcome message" in the TUI to ensure the model
provider is displayed.
* Updated the docs in `codex-rs/README.md`.

To exercise the chat completions API from OpenAI models, I added the
following to my `config.toml`:

```toml
model = "gpt-4o"
model_provider = "openai-chat-completions"

[model_providers.openai-chat-completions]
name = "OpenAI using Chat Completions"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
wire_api = "chat"
```

Though to test a non-OpenAI provider, I installed ollama with mistral
locally on my Mac because ChatGPT said that would be a good match for my
hardware:

```shell
brew install ollama
ollama serve
ollama pull mistral
```

Then I added the following to my `~/.codex/config.toml`:

```toml
model = "mistral"
model_provider = "ollama"
```

Note this code could certainly use more test coverage, but I want to get
this in so folks can start playing with it.

For reference, I believe https://github.com/openai/codex/pull/247 was
roughly the comparable PR on the TypeScript side.
											
										
										
											2025-05-08 21:46:06 -07:00
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        // Construct the input that we will send to the model.
 								        //
 								        // - For review threads, use the isolated in-memory history so the
 								        //   model sees a fresh conversation (no parent history/user_instructions).
 								        //
 								        // - For normal turns, use the session's full history. When using the
 								        //   chat completions API (or ZDR clients), the model needs the full
 								        //   conversation history on each turn. The rollout file, however, should
 								        //   only record the new items that originated in this turn so that it
 								        //   represents an append-only log without duplicates.
 								        let turn_input: Vec<ResponseItem> = if is_review_mode {
 								            if !pending_input.is_empty() {
-												Move changing turn input functionalities to `ConversationHistory` (#5473)

We are doing some ad-hoc logic while dealing with conversation history.
Ideally, we shouldn't mutate `vec[responseitem]` manually at all and
should depend on `ConversationHistory` for those changes.

Those changes are:
- Adding input to the history
- Removing items from the history
- Correcting history

I am also adding some `error` logs for cases we shouldn't ideally face.
For example, we shouldn't be missing `toolcalls` or `outputs`. We
shouldn't hit `ContextWindowExceeded` while performing `compact`

This refactor will give us granular control over our context management.
											
										
										
											2025-10-22 13:08:46 -07:00
+								                review_thread_history.record_items(&pending_input);
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								            }
-												Filter out reasoning items from previous turns (#5857)

Reduces request size and prevents 400 errors when switching between API
orgs.

Based on Responses API behavior described in
https://cookbook.openai.com/examples/responses_api/reasoning_items#caching
											
										
										
											2025-10-28 11:39:34 -07:00
+								            review_thread_history.get_history_for_prompt()
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        } else {
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
+								            sess.record_conversation_items(&turn_context, &pending_input)
 								                .await;
-												Filter out reasoning items from previous turns (#5857)

Reduces request size and prevents 400 errors when switching between API
orgs.

Based on Responses API behavior described in
https://cookbook.openai.com/examples/responses_api/reasoning_items#caching
											
										
										
											2025-10-28 11:39:34 -07:00
+								            sess.clone_history().await.get_history_for_prompt()
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								        };
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								        let turn_input_messages: Vec<String> = turn_input
 								            .iter()
 								            .filter_map(|item| match item {
 								                ResponseItem::Message { content, .. } => Some(content),
 								                _ => None,
 								            })
 								            .flat_map(|content| {
 								                content.iter().filter_map(|item| match item {
 								                    ContentItem::OutputText { text } => Some(text.clone()),
 								                    _ => None,
 								                })
 								            })
 								            .collect();
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        match run_turn(
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								            Arc::clone(&sess),
 								            Arc::clone(&turn_context),
 								            Arc::clone(&turn_diff_tracker),
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								            turn_input,
-												feat: add header for task kind (#5142)

Add a header in the responses API request for the task kind (compact,
review, ...) for observability purpose
The header name is `codex-task-type`
											
										
										
											2025-10-14 16:17:00 +01:00
+								            task_kind,
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								            cancellation_token.child_token(),
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        )
 								        .await
 								        {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            Ok(turn_output) => {
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                let TurnRunResult {
 								                    processed_items,
 								                    total_token_usage,
 								                } = turn_output;
 								                let limit = turn_context
 								                    .client
 								                    .get_auto_compact_token_limit()
 								                    .unwrap_or(i64::MAX);
 								                let total_usage_tokens = total_token_usage
 								                    .as_ref()
-												chore: clippy on redundant closure (#4058)

Add redundant closure clippy rules and let Codex fix it by minimising
FQP
											
										
										
											2025-09-22 20:30:16 +01:00
+								                    .map(TokenUsage::tokens_in_context_window);
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                let token_limit_reached = total_usage_tokens
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								                    .map(|tokens| tokens >= limit)
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                    .unwrap_or(false);
-												Handle cancelling/aborting while processing a turn (#5543)

Currently we collect all all turn items in a vector, then we add it to
the history on success. This result in losing those items on errors
including aborting `ctrl+c`.

This PR:
- Adds the ability for the tool call to handle cancellation
- bubble the turn items up to where we are recording this info

Admittedly, this logic is an ad-hoc logic that doesn't handle a lot of
error edge cases. The right thing to do is recording to the history on
the spot as `items`/`tool calls output` come. However, this isn't
possible because of having different `task_kind` that has different
`conversation_histories`. The `try_run_turn` has no idea what thread are
we using. We cannot also pass an `arc` to the `conversation_histories`
because it's a private element of `state`.

That's said, `abort` is the most common case and we should cover it
until we remove `task kind`
											
										
										
											2025-10-23 08:47:10 -07:00
+								                let (responses, items_to_record_in_conversation_history) = process_items(
 								                    processed_items,
 								                    is_review_mode,
 								                    &mut review_thread_history,
 								                    &sess,
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
+								                    &turn_context,
-												Handle cancelling/aborting while processing a turn (#5543)

Currently we collect all all turn items in a vector, then we add it to
the history on success. This result in losing those items on errors
including aborting `ctrl+c`.

This PR:
- Adds the ability for the tool call to handle cancellation
- bubble the turn items up to where we are recording this info

Admittedly, this logic is an ad-hoc logic that doesn't handle a lot of
error edge cases. The right thing to do is recording to the history on
the spot as `items`/`tool calls output` come. However, this isn't
possible because of having different `task_kind` that has different
`conversation_histories`. The `try_run_turn` has no idea what thread are
we using. We cannot also pass an `arc` to the `conversation_histories`
because it's a private element of `state`.

That's said, `abort` is the most common case and we should cover it
until we remove `task kind`
											
										
										
											2025-10-23 08:47:10 -07:00
+								                )
 								                .await;
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                if token_limit_reached {
 								                    if auto_compact_recently_attempted {
 								                        let limit_str = limit.to_string();
 								                        let current_tokens = total_usage_tokens
 								                            .map(|tokens| tokens.to_string())
 								                            .unwrap_or_else(|| "unknown".to_string());
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                        let event = EventMsg::Error(ErrorEvent {
 								                            message: format!(
 								                                "Conversation is still above the token limit after automatic summarization (limit {limit_str}, current {current_tokens}). Please start a new session or trim your input."
 								                            ),
 								                        });
 								                        sess.send_event(&turn_context, event).await;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                        break;
 								                    }
 								                    auto_compact_recently_attempted = true;
 								                    compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
 								                    continue;
 								                }
 								                auto_compact_recently_attempted = false;
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								                if responses.is_empty() {
-												fix: chat completions API now also passes tools along (#1167)

Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
											
										
										
											2025-06-02 13:47:51 -07:00
+								                    last_agent_message = get_last_assistant_message_from_turn(
 								                        &items_to_record_in_conversation_history,
 								                    );
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								                    sess.notifier()
 								                        .notify(&UserNotification::AgentTurnComplete {
-												feat: pass codex thread ID in notifier metadata (#4582)


											
										
										
											2025-10-14 11:55:10 -07:00
+								                            thread_id: sess.conversation_id.to_string(),
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                            turn_id: turn_context.sub_id.clone(),
-												feat: include cwd in notify payload (#5415)

Expose the session cwd in the notify payload and update docs so scripts
and extensions receive the real project path; users get accurate
project-aware notifications in CLI and VS Code.

Fixes #5387
											
										
										
											2025-10-20 16:53:03 -07:00
+								                            cwd: turn_context.cwd.display().to_string(),
-												Add notifier tests (#4064)

Proposal:
1. Use anyhow for tests and avoid unwrap
2. Extract a helper for starting a test instance of codex
											
										
										
											2025-09-23 07:25:46 -07:00
+								                            input_messages: turn_input_messages,
 								                            last_assistant_message: last_agent_message.clone(),
 								                        });
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                    break;
 								                }
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                continue;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
-												Handle cancelling/aborting while processing a turn (#5543)

Currently we collect all all turn items in a vector, then we add it to
the history on success. This result in losing those items on errors
including aborting `ctrl+c`.

This PR:
- Adds the ability for the tool call to handle cancellation
- bubble the turn items up to where we are recording this info

Admittedly, this logic is an ad-hoc logic that doesn't handle a lot of
error edge cases. The right thing to do is recording to the history on
the spot as `items`/`tool calls output` come. However, this isn't
possible because of having different `task_kind` that has different
`conversation_histories`. The `try_run_turn` has no idea what thread are
we using. We cannot also pass an `arc` to the `conversation_histories`
because it's a private element of `state`.

That's said, `abort` is the most common case and we should cover it
until we remove `task kind`
											
										
										
											2025-10-23 08:47:10 -07:00
+								            Err(CodexErr::TurnAborted {
 								                dangling_artifacts: processed_items,
 								            }) => {
 								                let _ = process_items(
 								                    processed_items,
 								                    is_review_mode,
 								                    &mut review_thread_history,
 								                    &sess,
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
+								                    &turn_context,
-												Handle cancelling/aborting while processing a turn (#5543)

Currently we collect all all turn items in a vector, then we add it to
the history on success. This result in losing those items on errors
including aborting `ctrl+c`.

This PR:
- Adds the ability for the tool call to handle cancellation
- bubble the turn items up to where we are recording this info

Admittedly, this logic is an ad-hoc logic that doesn't handle a lot of
error edge cases. The right thing to do is recording to the history on
the spot as `items`/`tool calls output` come. However, this isn't
possible because of having different `task_kind` that has different
`conversation_histories`. The `try_run_turn` has no idea what thread are
we using. We cannot also pass an `arc` to the `conversation_histories`
because it's a private element of `state`.

That's said, `abort` is the most common case and we should cover it
until we remove `task kind`
											
										
										
											2025-10-23 08:47:10 -07:00
+								                )
 								                .await;
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								                // Aborted turn is reported via a different event.
 								                break;
 								            }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            Err(e) => {
 								                info!("Turn error: {e:#}");
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                let event = EventMsg::Error(ErrorEvent {
 								                    message: e.to_string(),
 								                });
 								                sess.send_event(&turn_context, event).await;
-												[core] Allow resume after client errors (#2053)

## Summary
Allow tui conversations to resume after the client fails out of retries.
I tested this with exec / mocked api failures as well, and it appears to
be fine. But happy to add an exec integration test as well!

## Testing
- [x] Added integration test
- [x] Tested locally
											
										
										
											2025-08-08 18:21:19 -07:00
+								                // let the user continue the conversation
 								                break;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
 								        }
 								    }
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
 								    // If this was a review thread and we have a final assistant message,
 								    // try to parse it as a ReviewOutput.
 								    //
 								    // If parsing fails, construct a minimal ReviewOutputEvent using the plain
 								    // text as the overall explanation. Else, just exit review mode with None.
 								    //
 								    // Emits an ExitedReviewMode event with the parsed review output.
 								    if turn_context.is_review_mode {
 								        exit_review_mode(
 								            sess.clone(),
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								            Arc::clone(&turn_context),
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								            last_agent_message.as_deref().map(parse_review_output_event),
 								        )
 								        .await;
 								    }
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    last_agent_message
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								}
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								/// Parse the review output; when not valid JSON, build a structured
 								/// fallback that carries the plain text as the overall explanation.
 								///
 								/// Returns: a ReviewOutputEvent parsed from JSON or a fallback populated from text.
 								fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
 								    // Try direct parse first
 								    if let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(text) {
 								        return ev;
 								    }
 								    // If wrapped in markdown fences or extra prose, attempt to extract the first JSON object
 								    if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}'))
 								        && start < end
 								        && let Some(slice) = text.get(start..=end)
 								        && let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(slice)
 								    {
 								        return ev;
 								    }
 								    // Not JSON – return a structured ReviewOutputEvent that carries
 								    // the plain text as the overall explanation.
 								    ReviewOutputEvent {
 								        overall_explanation: text.to_string(),
 								        ..Default::default()
 								    }
 								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								async fn run_turn(
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								    sess: Arc<Session>,
 								    turn_context: Arc<TurnContext>,
 								    turn_diff_tracker: SharedTurnDiffTracker,
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								    input: Vec<ResponseItem>,
-												feat: add header for task kind (#5142)

Add a header in the responses API request for the task kind (compact,
review, ...) for observability purpose
The header name is `codex-task-type`
											
										
										
											2025-10-14 16:17:00 +01:00
+								    task_kind: TaskKind,
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								    cancellation_token: CancellationToken,
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								) -> CodexResult<TurnRunResult> {
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								    let mcp_tools = sess.services.mcp_connection_manager.list_all_tools();
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								    let router = Arc::new(ToolRouter::from_config(
 								        &turn_context.tools_config,
 								        Some(mcp_tools),
 								    ));
-												[core] Separate tools config from openai client (#1858)

## Summary
In an effort to make tools easier to work with and more configurable,
I'm introducing `ToolConfig` and updating `Prompt` to take in a general
list of Tools. I think this is simpler and better for a few reasons:
- We can easily assemble tools from various sources (our own harness,
mcp servers, etc.) and we can consolidate the logic for constructing the
logic in one place that is separate from serialization.
- client.rs no longer needs arbitrary config values, it just takes in a
list of tools to serialize

A hefty portion of the PR is now updating our conversion of
`mcp_types::Tool` to `OpenAITool`, but considering that @bolinfest
accurately called this out as a TODO long ago, I think it's time we
tackled it.

## Testing
- [x] Experimented locally, no changes, as expected
- [x] Added additional unit tests
- [x] Responded to rust-review
											
										
										
											2025-08-05 19:27:52 -07:00
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								    let model_supports_parallel = turn_context
 								        .client
 								        .get_model_family()
 								        .supports_parallel_tool_calls;
 								    let parallel_tool_calls = model_supports_parallel;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    let prompt = Prompt {
-												Filter out reasoning items from previous turns (#5857)

Reduces request size and prevents 400 errors when switching between API
orgs.

Based on Responses API behavior described in
https://cookbook.openai.com/examples/responses_api/reasoning_items#caching
											
										
										
											2025-10-28 11:39:34 -07:00
+								        input,
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								        tools: router.specs(),
 								        parallel_tool_calls,
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								        base_instructions_override: turn_context.base_instructions.clone(),
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								        output_schema: turn_context.final_output_json_schema.clone(),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    };
 								    let mut retries = 0;
 								    loop {
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								        match try_run_turn(
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								            Arc::clone(&router),
 								            Arc::clone(&sess),
 								            Arc::clone(&turn_context),
 								            Arc::clone(&turn_diff_tracker),
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								            &prompt,
-												feat: add header for task kind (#5142)

Add a header in the responses API request for the task kind (compact,
review, ...) for observability purpose
The header name is `codex-task-type`
											
										
										
											2025-10-14 16:17:00 +01:00
+								            task_kind,
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								            cancellation_token.child_token(),
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								        )
 								        .await
 								        {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            Ok(output) => return Ok(output),
-												Handle cancelling/aborting while processing a turn (#5543)

Currently we collect all all turn items in a vector, then we add it to
the history on success. This result in losing those items on errors
including aborting `ctrl+c`.

This PR:
- Adds the ability for the tool call to handle cancellation
- bubble the turn items up to where we are recording this info

Admittedly, this logic is an ad-hoc logic that doesn't handle a lot of
error edge cases. The right thing to do is recording to the history on
the spot as `items`/`tool calls output` come. However, this isn't
possible because of having different `task_kind` that has different
`conversation_histories`. The `try_run_turn` has no idea what thread are
we using. We cannot also pass an `arc` to the `conversation_histories`
because it's a private element of `state`.

That's said, `abort` is the most common case and we should cover it
until we remove `task kind`
											
										
										
											2025-10-23 08:47:10 -07:00
+								            Err(CodexErr::TurnAborted {
 								                dangling_artifacts: processed_items,
 								            }) => {
 								                return Err(CodexErr::TurnAborted {
 								                    dangling_artifacts: processed_items,
 								                });
 								            }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
-												feat: support the chat completions API in the Rust CLI (#862)

This is a substantial PR to add support for the chat completions API,
which in turn makes it possible to use non-OpenAI model providers (just
like in the TypeScript CLI):

* It moves a number of structs from `client.rs` to `client_common.rs` so
they can be shared.
* It introduces support for the chat completions API in
`chat_completions.rs`.
* It updates `ModelProviderInfo` so that `env_key` is `Option<String>`
instead of `String` (for e.g., ollama) and adds a `wire_api` field
* It updates `client.rs` to choose between `stream_responses()` and
`stream_chat_completions()` based on the `wire_api` for the
`ModelProviderInfo`
* It updates the `exec` and TUI CLIs to no longer fail if the
`OPENAI_API_KEY` environment variable is not set
* It updates the TUI so that `EventMsg::Error` is displayed more
prominently when it occurs, particularly now that it is important to
alert users to the `CodexErr::EnvVar` variant.
* `CodexErr::EnvVar` was updated to include an optional `instructions`
field so we can preserve the behavior where we direct users to
https://platform.openai.com if `OPENAI_API_KEY` is not set.
* Cleaned up the "welcome message" in the TUI to ensure the model
provider is displayed.
* Updated the docs in `codex-rs/README.md`.

To exercise the chat completions API from OpenAI models, I added the
following to my `config.toml`:

```toml
model = "gpt-4o"
model_provider = "openai-chat-completions"

[model_providers.openai-chat-completions]
name = "OpenAI using Chat Completions"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
wire_api = "chat"
```

Though to test a non-OpenAI provider, I installed ollama with mistral
locally on my Mac because ChatGPT said that would be a good match for my
hardware:

```shell
brew install ollama
ollama serve
ollama pull mistral
```

Then I added the following to my `~/.codex/config.toml`:

```toml
model = "mistral"
model_provider = "ollama"
```

Note this code could certainly use more test coverage, but I want to get
this in so folks can start playing with it.

For reference, I believe https://github.com/openai/codex/pull/247 was
roughly the comparable PR on the TypeScript side.
											
										
										
											2025-05-08 21:46:06 -07:00
+								            Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								            Err(e @ CodexErr::Fatal(_)) => return Err(e),
-												Surface context window error to the client (#4675)

In the past, we were treating `input exceeded context window` as a
streaming error and retrying on it. Retrying on it has no point because
it won't change the behavior. In this PR, we surface the error to the
client without retry and also send a token count event to indicate that
the context window is full.

<img width="650" height="125" alt="image"
src="https://github.com/user-attachments/assets/c26b1213-4c27-4bfc-90f4-51a270a3efd5"
/>
											
										
										
											2025-10-04 18:40:06 -07:00
+								            Err(e @ CodexErr::ContextWindowExceeded) => {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                sess.set_total_tokens_full(turn_context.as_ref()).await;
-												Surface context window error to the client (#4675)

In the past, we were treating `input exceeded context window` as a
streaming error and retrying on it. Retrying on it has no point because
it won't change the behavior. In this PR, we surface the error to the
client without retry and also send a token count event to indicate that
the context window is full.

<img width="650" height="125" alt="image"
src="https://github.com/user-attachments/assets/c26b1213-4c27-4bfc-90f4-51a270a3efd5"
/>
											
										
										
											2025-10-04 18:40:06 -07:00
+								                return Err(e);
 								            }
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								            Err(CodexErr::UsageLimitReached(e)) => {
 								                let rate_limits = e.rate_limits.clone();
 								                if let Some(rate_limits) = rate_limits {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                    sess.update_rate_limits(turn_context.as_ref(), rate_limits)
 								                        .await;
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								                }
 								                return Err(CodexErr::UsageLimitReached(e));
-												Adjust error messages (#1969)

<img width="1378" height="285" alt="image"
src="https://github.com/user-attachments/assets/f0283378-f839-4a1f-8331-909694a04b1f"
/>
											
										
										
											2025-08-07 18:24:34 -07:00
+								            }
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								            Err(CodexErr::UsageNotIncluded) => return Err(CodexErr::UsageNotIncluded),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            Err(e) => {
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								                // Use the configured provider-specific stream retry budget.
-												feat: introduce TurnContext (#2343)

This PR introduces `TurnContext`, which is designed to hold a set of
fields that should be constant for a turn of a conversation. Note that
the fields of `TurnContext` were previously governed by `Session`.

Ultimately, we want to enable users to change these values between turns
(changing model, approval policy, etc.), though in the current
implementation, the `TurnContext` is constant for the entire
conversation.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2345).
* #2345
* #2329
* __->__ #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:40:02 -07:00
+								                let max_retries = turn_context.client.get_provider().stream_max_retries();
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								                if retries < max_retries {
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                    retries += 1;
-												Wait for requested delay in rate limit errors (#2266)

Fixes: https://github.com/openai/codex/issues/2131

Response doesn't have the delay in a separate field (yet) so parse the
message.
											
										
										
											2025-08-13 15:43:54 -07:00
+								                    let delay = match e {
 								                        CodexErr::Stream(_, Some(delay)) => delay,
 								                        _ => backoff(retries),
 								                    };
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                    warn!(
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								                        "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                    );
 								                    // Surface retry information to any UI/front‑end so the
 								                    // user understands what is happening instead of staring
 								                    // at a seemingly frozen screen.
-												Parse and expose stream errors (#2540)


											
										
										
											2025-08-21 01:15:24 -07:00
+								                    sess.notify_stream_error(
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                        turn_context.as_ref(),
-												Made token refresh code resilient to missing `id_token` (#5782)

This PR does the following:
1. Changes `try_refresh_token` to handle the case where the endpoint
returns a response without an `id_token`. The OpenID spec indicates that
this field is optional and clients should not assume it's present.
2. Changes the `attempt_stream_responses` to propagate token refresh
errors rather than silently ignoring them.
3. Fixes a typo in a couple of error messages (unrelated to the above,
but something I noticed in passing) - "reconnect" should be spelled
without a hyphen.

This PR does not implement the additional suggestion from @pakrym-oai
that we should sign out when receiving `refresh_token_expired` from the
refresh endpoint. Leaving this as a follow-on because I'm undecided on
whether this should be implemented in `try_refresh_token` or its
callers.
											
										
										
											2025-10-27 12:09:53 -05:00
+								                        format!("Reconnecting... {retries}/{max_retries}"),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								                    )
 								                    .await;
 								                    tokio::time::sleep(delay).await;
 								                } else {
 								                    return Err(e);
 								                }
 								            }
 								        }
 								    }
 								}
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								/// When the model is prompted, it returns a stream of events. Some of these
 								/// events map to a `ResponseItem`. A `ResponseItem` may need to be
 								/// "handled" such that it produces a `ResponseInputItem` that needs to be
 								/// sent back to the model on the next turn.
-												[Rust] Allow resuming a session that was killed with ctrl + c (#1387)

Previously, if you ctrl+c'd a conversation, all subsequent turns would
400 because the Responses API never got a response for one of its call
ids. This ensures that if we aren't sending a call id by hand, we
generate a synthetic aborted call.

Fixes #1244 


https://github.com/user-attachments/assets/5126354f-b970-45f5-8c65-f811bca8294a
											
										
										
											2025-06-26 14:40:42 -04:00
+								#[derive(Debug)]
-												Handle cancelling/aborting while processing a turn (#5543)

Currently we collect all all turn items in a vector, then we add it to
the history on success. This result in losing those items on errors
including aborting `ctrl+c`.

This PR:
- Adds the ability for the tool call to handle cancellation
- bubble the turn items up to where we are recording this info

Admittedly, this logic is an ad-hoc logic that doesn't handle a lot of
error edge cases. The right thing to do is recording to the history on
the spot as `items`/`tool calls output` come. However, this isn't
possible because of having different `task_kind` that has different
`conversation_histories`. The `try_run_turn` has no idea what thread are
we using. We cannot also pass an `arc` to the `conversation_histories`
because it's a private element of `state`.

That's said, `abort` is the most common case and we should cover it
until we remove `task kind`
											
										
										
											2025-10-23 08:47:10 -07:00
+								pub struct ProcessedResponseItem {
 								    pub item: ResponseItem,
 								    pub response: Option<ResponseInputItem>,
-												feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
											
										
										
											2025-04-25 12:08:18 -07:00
+								}
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								#[derive(Debug)]
 								struct TurnRunResult {
 								    processed_items: Vec<ProcessedResponseItem>,
 								    total_token_usage: Option<TokenUsage>,
 								}
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								#[allow(clippy::too_many_arguments)]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								async fn try_run_turn(
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								    router: Arc<ToolRouter>,
 								    sess: Arc<Session>,
 								    turn_context: Arc<TurnContext>,
 								    turn_diff_tracker: SharedTurnDiffTracker,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    prompt: &Prompt,
-												feat: add header for task kind (#5142)

Add a header in the responses API request for the task kind (compact,
review, ...) for observability purpose
The header name is `codex-task-type`
											
										
										
											2025-10-14 16:17:00 +01:00
+								    task_kind: TaskKind,
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								    cancellation_token: CancellationToken,
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								) -> CodexResult<TurnRunResult> {
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								    let rollout_item = RolloutItem::TurnContext(TurnContextItem {
 								        cwd: turn_context.cwd.clone(),
 								        approval_policy: turn_context.approval_policy,
 								        sandbox_policy: turn_context.sandbox_policy.clone(),
-												chore: enable clippy::redundant_clone (#3489)

Created this PR by:

- adding `redundant_clone` to `[workspace.lints.clippy]` in
`cargo-rs/Cargol.toml`
- running `cargo clippy --tests --fix`
- running `just fmt`

Though I had to clean up one instance of the following that resulted:

```rust
let codex = codex;
```
											
										
										
											2025-09-11 11:59:37 -07:00
+								        model: turn_context.client.get_model(),
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								        effort: turn_context.client.get_reasoning_effort(),
 								        summary: turn_context.client.get_reasoning_summary(),
 								    });
-												Move changing turn input functionalities to `ConversationHistory` (#5473)

We are doing some ad-hoc logic while dealing with conversation history.
Ideally, we shouldn't mutate `vec[responseitem]` manually at all and
should depend on `ConversationHistory` for those changes.

Those changes are:
- Adding input to the history
- Removing items from the history
- Correcting history

I am also adding some `error` logs for cases we shouldn't ideally face.
For example, we shouldn't be missing `toolcalls` or `outputs`. We
shouldn't hit `ContextWindowExceeded` while performing `compact`

This refactor will give us granular control over our context management.
											
										
										
											2025-10-22 13:08:46 -07:00
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								    sess.persist_rollout_items(&[rollout_item]).await;
-												feat: add header for task kind (#5142)

Add a header in the responses API request for the task kind (compact,
review, ...) for observability purpose
The header name is `codex-task-type`
											
										
										
											2025-10-14 16:17:00 +01:00
+								    let mut stream = turn_context
 								        .client
 								        .clone()
-												Move changing turn input functionalities to `ConversationHistory` (#5473)

We are doing some ad-hoc logic while dealing with conversation history.
Ideally, we shouldn't mutate `vec[responseitem]` manually at all and
should depend on `ConversationHistory` for those changes.

Those changes are:
- Adding input to the history
- Removing items from the history
- Correcting history

I am also adding some `error` logs for cases we shouldn't ideally face.
For example, we shouldn't be missing `toolcalls` or `outputs`. We
shouldn't hit `ContextWindowExceeded` while performing `compact`

This refactor will give us granular control over our context management.
											
										
										
											2025-10-22 13:08:46 -07:00
+								        .stream_with_task_kind(prompt, task_kind)
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								        .or_cancel(&cancellation_token)
 								        .await??;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
+								    let tool_runtime = ToolCallRuntime::new(
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								        Arc::clone(&router),
 								        Arc::clone(&sess),
 								        Arc::clone(&turn_context),
 								        Arc::clone(&turn_diff_tracker),
 								    );
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
+								    let mut output: FuturesOrdered<BoxFuture<CodexResult<ProcessedResponseItem>>> =
 								        FuturesOrdered::new();
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								    loop {
 								        // Poll the next item from the model stream. We must inspect *both* Ok and Err
 								        // cases so that transient stream failures (e.g., dropped SSE connection before
 								        // `response.completed`) bubble up and trigger the caller's retry logic.
-												Handle cancelling/aborting while processing a turn (#5543)

Currently we collect all all turn items in a vector, then we add it to
the history on success. This result in losing those items on errors
including aborting `ctrl+c`.

This PR:
- Adds the ability for the tool call to handle cancellation
- bubble the turn items up to where we are recording this info

Admittedly, this logic is an ad-hoc logic that doesn't handle a lot of
error edge cases. The right thing to do is recording to the history on
the spot as `items`/`tool calls output` come. However, this isn't
possible because of having different `task_kind` that has different
`conversation_histories`. The `try_run_turn` has no idea what thread are
we using. We cannot also pass an `arc` to the `conversation_histories`
because it's a private element of `state`.

That's said, `abort` is the most common case and we should cover it
until we remove `task kind`
											
										
										
											2025-10-23 08:47:10 -07:00
+								        let event = match stream.next().or_cancel(&cancellation_token).await {
 								            Ok(event) => event,
 								            Err(codex_async_utils::CancelErr::Cancelled) => {
 								                let processed_items = output.try_collect().await?;
 								                return Err(CodexErr::TurnAborted {
 								                    dangling_artifacts: processed_items,
 								                });
 								            }
 								        };
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								        let event = match event {
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
+								            Some(res) => res?,
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								            None => {
 								                return Err(CodexErr::Stream(
 								                    "stream closed before response.completed".into(),
 								                    None,
 								                ));
 								            }
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								        };
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
+								        let add_completed = &mut |response_item: ProcessedResponseItem| {
 								            output.push_back(future::ready(Ok(response_item)).boxed());
-												Refactor env settings into config (#1601)

## Summary
- add OpenAI retry and timeout fields to Config
- inject these settings in tests instead of mutating env vars
- plumb Config values through client and chat completions logic
- document new configuration options

## Testing
- `cargo test -p codex-core --no-run`

------
https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
											
										
										
											2025-07-18 12:12:39 -07:00
+								        };
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        match event {
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								            ResponseEvent::Created => {}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            ResponseEvent::OutputItemDone(item) => {
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                match ToolRouter::build_tool_call(sess.as_ref(), item.clone()) {
 								                    Ok(Some(call)) => {
 								                        let payload_preview = call.payload.log_payload().into_owned();
 								                        tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
-												Handle cancelling/aborting while processing a turn (#5543)

Currently we collect all all turn items in a vector, then we add it to
the history on success. This result in losing those items on errors
including aborting `ctrl+c`.

This PR:
- Adds the ability for the tool call to handle cancellation
- bubble the turn items up to where we are recording this info

Admittedly, this logic is an ad-hoc logic that doesn't handle a lot of
error edge cases. The right thing to do is recording to the history on
the spot as `items`/`tool calls output` come. However, this isn't
possible because of having different `task_kind` that has different
`conversation_histories`. The `try_run_turn` has no idea what thread are
we using. We cannot also pass an `arc` to the `conversation_histories`
because it's a private element of `state`.

That's said, `abort` is the most common case and we should cover it
until we remove `task kind`
											
										
										
											2025-10-23 08:47:10 -07:00
+								                        let response =
 								                            tool_runtime.handle_tool_call(call, cancellation_token.child_token());
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
 								                        output.push_back(
 								                            async move {
 								                                Ok(ProcessedResponseItem {
 								                                    item,
 								                                    response: Some(response.await?),
 								                                })
 								                            }
 								                            .boxed(),
 								                        );
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                    }
 								                    Ok(None) => {
 								                        let response = handle_non_tool_response_item(
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								                            sess.as_ref(),
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                            Arc::clone(&turn_context),
 								                            item.clone(),
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								                            sess.show_raw_agent_reasoning(),
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                        )
 								                        .await?;
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
+								                        add_completed(ProcessedResponseItem { item, response });
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                    }
 								                    Err(FunctionCallError::MissingLocalShellCallId) => {
 								                        let msg = "LocalShellCall without call_id or id";
 								                        turn_context
 								                            .client
 								                            .get_otel_event_manager()
 								                            .log_tool_failed("local_shell", msg);
 								                        error!(msg);
 								                        let response = ResponseInputItem::FunctionCallOutput {
 								                            call_id: String::new(),
 								                            output: FunctionCallOutputPayload {
 								                                content: msg.to_string(),
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								                                ..Default::default()
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                            },
 								                        };
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
+								                        add_completed(ProcessedResponseItem {
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                            item,
 								                            response: Some(response),
 								                        });
 								                    }
-												feat: better UX during refusal (#5260)

<img width="568" height="169" alt="Screenshot 2025-10-16 at 18 28 05"
src="https://github.com/user-attachments/assets/f42e8d6d-b7de-4948-b291-a5fbb50b1312"
/>
											
										
										
											2025-10-17 10:06:55 +01:00
+								                    Err(FunctionCallError::RespondToModel(message))
 								                    | Err(FunctionCallError::Denied(message)) => {
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                        let response = ResponseInputItem::FunctionCallOutput {
 								                            call_id: String::new(),
 								                            output: FunctionCallOutputPayload {
 								                                content: message,
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								                                ..Default::default()
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                            },
 								                        };
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
+								                        add_completed(ProcessedResponseItem {
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                            item,
 								                            response: Some(response),
 								                        });
 								                    }
 								                    Err(FunctionCallError::Fatal(message)) => {
 								                        return Err(CodexErr::Fatal(message));
 								                    }
 								                }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
-												Following up on #2371 post commit feedback (#2852)

- Introduce websearch end to complement the begin 
- Moves the logic of adding the sebsearch tool to
create_tools_json_for_responses_api
- Making it the client responsibility to toggle the tool on or off 
- Other misc in #2371 post commit feedback
- Show the query:

<img width="1392" height="151" alt="image"
src="https://github.com/user-attachments/assets/8457f1a6-f851-44cf-bcca-0d4fe460ce89"
/>
											
										
										
											2025-08-28 19:24:38 -07:00
+								            ResponseEvent::WebSearchCallBegin { call_id } => {
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
+								                let _ = sess
 								                    .tx_event
 								                    .send(Event {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                        id: turn_context.sub_id.clone(),
-												Following up on #2371 post commit feedback (#2852)

- Introduce websearch end to complement the begin 
- Moves the logic of adding the sebsearch tool to
create_tools_json_for_responses_api
- Making it the client responsibility to toggle the tool on or off 
- Other misc in #2371 post commit feedback
- Show the query:

<img width="1392" height="151" alt="image"
src="https://github.com/user-attachments/assets/8457f1a6-f851-44cf-bcca-0d4fe460ce89"
/>
											
										
										
											2025-08-28 19:24:38 -07:00
+								                        msg: EventMsg::WebSearchBegin(WebSearchBeginEvent { call_id }),
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
+								                    })
 								                    .await;
 								            }
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								            ResponseEvent::RateLimits(snapshot) => {
 								                // Update internal state with latest rate limits, but defer sending until
 								                // token usage is available to avoid duplicate TokenCount events.
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                sess.update_rate_limits(turn_context.as_ref(), snapshot)
 								                    .await;
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								            }
-												feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.

When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.

Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

![Screenshot 2025-06-25 at 11 20
55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49)

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.

Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:


https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16

Fixes https://github.com/openai/codex/issues/1242
											
										
										
											2025-06-25 23:31:11 -07:00
+								            ResponseEvent::Completed {
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								                response_id: _,
-												feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.

When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.

Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

![Screenshot 2025-06-25 at 11 20
55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49)

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.

Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:


https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16

Fixes https://github.com/openai/codex/issues/1242
											
										
										
											2025-06-25 23:31:11 -07:00
+								                token_usage,
 								            } => {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                sess.update_token_usage_info(turn_context.as_ref(), token_usage.as_ref())
-												Introduce rollout items  (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
											
										
										
											2025-09-09 16:52:33 -07:00
+								                    .await;
-												Handle cancelling/aborting while processing a turn (#5543)

Currently we collect all all turn items in a vector, then we add it to
the history on success. This result in losing those items on errors
including aborting `ctrl+c`.

This PR:
- Adds the ability for the tool call to handle cancellation
- bubble the turn items up to where we are recording this info

Admittedly, this logic is an ad-hoc logic that doesn't handle a lot of
error edge cases. The right thing to do is recording to the history on
the spot as `items`/`tool calls output` come. However, this isn't
possible because of having different `task_kind` that has different
`conversation_histories`. The `try_run_turn` has no idea what thread are
we using. We cannot also pass an `arc` to the `conversation_histories`
because it's a private element of `state`.

That's said, `abort` is the most common case and we should cover it
until we remove `task kind`
											
										
										
											2025-10-23 08:47:10 -07:00
+								                let processed_items = output.try_collect().await?;
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                let unified_diff = {
 								                    let mut tracker = turn_diff_tracker.lock().await;
 								                    tracker.get_unified_diff()
 								                };
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								                if let Ok(Some(unified_diff)) = unified_diff {
 								                    let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                    sess.send_event(&turn_context, msg).await;
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								                }
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                let result = TurnRunResult {
-												Simplify parallel (#4829)

make tool processing return a future and then collect futures.
handle cleanup on Drop
											
										
										
											2025-10-07 10:12:38 -07:00
+								                    processed_items,
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								                    total_token_usage: token_usage.clone(),
 								                };
 								                return Ok(result);
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								            }
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								            ResponseEvent::OutputTextDelta(delta) => {
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								                // In review child threads, suppress assistant text deltas; the
 								                // UI will show a selection popup from the final ReviewOutput.
 								                if !turn_context.is_review_mode {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                    let event = EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta });
 								                    sess.send_event(&turn_context, event).await;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								                } else {
 								                    trace!("suppressing OutputTextDelta in review mode");
 								                }
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								            }
 								            ResponseEvent::ReasoningSummaryDelta(delta) => {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                let event = EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta });
 								                sess.send_event(&turn_context, event).await;
-												Re-add markdown streaming (#2029)

Wait for newlines, then render markdown on a line by line basis. Word wrap it for the current terminal size and then spit it out line by line into the UI. Also adds tests and fixes some UI regressions.
											
										
										
											2025-08-12 17:37:28 -07:00
+								            }
 								            ResponseEvent::ReasoningSummaryPartAdded => {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                let event =
 								                    EventMsg::AgentReasoningSectionBreak(AgentReasoningSectionBreakEvent {});
 								                sess.send_event(&turn_context, event).await;
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								            }
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								            ResponseEvent::ReasoningContentDelta(delta) => {
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								                if sess.show_raw_agent_reasoning() {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                    let event = EventMsg::AgentReasoningRawContentDelta(
 								                        AgentReasoningRawContentDeltaEvent { delta },
 								                    );
 								                    sess.send_event(&turn_context, event).await;
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								                }
 								            }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								        }
 								    }
 								}
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								async fn handle_non_tool_response_item(
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								    sess: &Session,
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								    turn_context: Arc<TurnContext>,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    item: ResponseItem,
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								    show_raw_agent_reasoning: bool,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								) -> CodexResult<Option<ResponseInputItem>> {
 								    debug!(?item, "Output item");
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								    match &item {
 								        ResponseItem::Message { .. }
 								        | ResponseItem::Reasoning { .. }
 								        | ResponseItem::WebSearchCall { .. } => {
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								            let turn_item = match &item {
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								                ResponseItem::Message { .. } if turn_context.is_review_mode => {
 								                    trace!("suppressing assistant Message in review mode");
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								                    None
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								                }
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								                _ => parse_turn_item(&item),
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								            };
-												Add new thread items and rewire event parsing to use them (#5418)

1. Adds AgentMessage,  Reasoning,  WebSearch items.
2. Switches the ResponseItem parsing to use new items and then also emit
3. Removes user-item kind and filters out "special" (environment) user
items when returning to clients.
											
										
										
											2025-10-22 10:14:50 -07:00
+								            if let Some(turn_item) = turn_item {
 								                sess.emit_turn_item_started_completed(
 								                    turn_context.as_ref(),
 								                    turn_item,
 								                    show_raw_agent_reasoning,
 								                )
 								                .await;
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								            }
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								        }
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								        ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. } => {
 								            debug!("unexpected tool output from stream");
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								        }
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								        _ => {}
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								    }
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
 								    Ok(None)
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								}
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<String> {
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								    responses.iter().rev().find_map(|item| {
-												Always send entire request context (#1641)

Always store the entire conversation history.
Request encrypted COT when not storing Responses.
Send entire input context instead of sending previous_response_id
											
										
										
											2025-07-23 10:37:45 -07:00
+								        if let ResponseItem::Message { role, content, .. } = item {
-												feat: configurable notifications in the Rust CLI  (#793)

With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:

```rust
pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
        input_messages: Vec<String>,

        /// The last message sent by the assistant in the turn.
        last_assistant_message: Option<String>,
    },
}
```

This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.

For example, I have the following in my `~/.codex/config.toml`:

```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```

I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:

```python
#!/usr/bin/env python3

import json
import subprocess
import sys


def main() -> int:
    if len(sys.argv) != 2:
        print("Usage: notify.py <NOTIFICATION_JSON>")
        return 1

    try:
        notification = json.loads(sys.argv[1])
    except json.JSONDecodeError:
        return 1

    match notification_type := notification.get("type"):
        case "agent-turn-complete":
            assistant_message = notification.get("last-assistant-message")
            if assistant_message:
                title = f"Codex: {assistant_message}"
            else:
                title = "Codex: Turn Complete!"
            input_messages = notification.get("input_messages", [])
            message = " ".join(input_messages)
            title += message
        case _:
            print(f"not sending a push notification for: {notification_type}")
            return 0

    subprocess.check_output(
        [
            "terminal-notifier",
            "-title",
            title,
            "-message",
            message,
            "-group",
            "codex",
            "-ignoreDnD",
            "-activate",
            "com.googlecode.iterm2",
        ]
    )

    return 0


if __name__ == "__main__":
    sys.exit(main())
```

For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:

* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
											
										
										
											2025-05-02 19:48:13 -07:00
+								            if role == "assistant" {
 								                content.iter().rev().find_map(|ci| {
 								                    if let ContentItem::OutputText { text } = ci {
 								                        Some(text.clone())
 								                    } else {
 								                        None
 								                    }
 								                })
 								            } else {
 								                None
 								            }
 								        } else {
 								            None
 								        }
 								    })
 								}
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
+								/// Emits an ExitedReviewMode Event with optional ReviewOutput,
 								/// and records a developer message with the review output.
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								pub(crate) async fn exit_review_mode(
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    session: Arc<Session>,
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    turn_context: Arc<TurnContext>,
-												Fix EventMsg Optional (#3604)


											
										
										
											2025-09-14 17:34:33 -07:00
+								    review_output: Option<ReviewOutputEvent>,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								) {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								    let event = EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
 								        review_output: review_output.clone(),
 								    });
 								    session.send_event(turn_context.as_ref(), event).await;
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
 								    let mut user_message = String::new();
 								    if let Some(out) = review_output {
 								        let mut findings_str = String::new();
 								        let text = out.overall_explanation.trim();
 								        if !text.is_empty() {
 								            findings_str.push_str(text);
 								        }
 								        if !out.findings.is_empty() {
 								            let block = format_review_findings_block(&out.findings, None);
 								            findings_str.push_str(&format!("\n{block}"));
 								        }
 								        user_message.push_str(&format!(
 								            r#"<user_action>
 								  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
 								  <action>review</action>
 								  <results>
 								  {findings_str}
 								  </results>
-												feat: /review (#3774)

Adds `/review` action in TUI

<img width="637" height="370" alt="Screenshot 2025-09-17 at 12 41 19 AM"
src="https://github.com/user-attachments/assets/b1979a6e-844a-4b97-ab20-107c185aec1d"
/>
											
										
										
											2025-09-18 14:14:16 -07:00
+								</user_action>
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
+								"#));
 								    } else {
 								        user_message.push_str(r#"<user_action>
 								  <context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
 								  <action>review</action>
 								  <results>
 								  None.
 								  </results>
-												feat: /review (#3774)

Adds `/review` action in TUI

<img width="637" height="370" alt="Screenshot 2025-09-17 at 12 41 19 AM"
src="https://github.com/user-attachments/assets/b1979a6e-844a-4b97-ab20-107c185aec1d"
/>
											
										
										
											2025-09-18 14:14:16 -07:00
+								</user_action>
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
+								"#);
 								    }
 								    session
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
+								        .record_conversation_items(
 								            &turn_context,
 								            &[ResponseItem::Message {
 								                id: None,
 								                role: "user".to_string(),
 								                content: vec![ContentItem::InputText { text: user_message }],
 								            }],
 								        )
-												Add dev message upon review out (#3758)

Proposal: We want to record a dev message like so:

```
{
      "type": "message",
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": "<user_action>
  <context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
  <action>review</action>
  <results>
  {findings_str}
  </results>
</user_action>"
        }
      ]
    },
```

Without showing in the chat transcript.

Rough idea, but it fixes issue where the user finishes a review thread,
and asks the parent "fix the rest of the review issues" thinking that
the parent knows about it.

### Question: Why not a tool call?

Because the agent didn't make the call, it was a human. + we haven't
implemented sub-agents yet, and we'll need to think about the way we
represent these human-led tool calls for the agent.
											
										
										
											2025-09-16 18:43:32 -07:00
+								        .await;
-												Add feedback upload request handling (#5682)


											
										
										
											2025-10-26 22:53:39 -07:00
+								    // Make the recorded review note visible immediately for readers.
 								    session.flush_rollout().await;
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								}
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
+								fn mcp_init_error_display(
 								    server_name: &str,
 								    entry: Option<&McpAuthStatusEntry>,
 								    err: &anyhow::Error,
 								) -> String {
 								    if let Some(McpServerTransportConfig::StreamableHttp {
 								        url,
 								        bearer_token_env_var,
 								        http_headers,
 								        ..
 								    }) = &entry.map(|entry| &entry.config.transport)
 								        && url == "https://api.githubcopilot.com/mcp/"
 								        && bearer_token_env_var.is_none()
 								        && http_headers.as_ref().map(HashMap::is_empty).unwrap_or(true)
 								    {
 								        // GitHub only supports OAUth for first party MCP clients.
 								        // That means that the user has to specify a personal access token either via bearer_token_env_var or http_headers.
 								        // https://github.com/github/github-mcp-server/issues/921#issuecomment-3221026448
 								        format!(
-												[MCP] Improve startup errors for timeouts and github (#5595)

1. I have seen too many reports of people hitting startup timeout errors
and thinking Codex is broken. Hopefully this will help people
self-serve. We may also want to consider raising the timeout to ~15s.
2. Make it more clear what PAT is (personal access token) in the GitHub
error

<img width="2378" height="674" alt="CleanShot 2025-10-23 at 22 05 06"
src="https://github.com/user-attachments/assets/d148ce1d-ade3-4511-84a4-c164aefdb5c5"
/>
											
										
										
											2025-10-23 22:54:45 -07:00
+								            "GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
+								        )
 								    } else if is_mcp_client_auth_required_error(err) {
 								        format!(
 								            "The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
 								        )
-												[MCP] Improve startup errors for timeouts and github (#5595)

1. I have seen too many reports of people hitting startup timeout errors
and thinking Codex is broken. Hopefully this will help people
self-serve. We may also want to consider raising the timeout to ~15s.
2. Make it more clear what PAT is (personal access token) in the GitHub
error

<img width="2378" height="674" alt="CleanShot 2025-10-23 at 22 05 06"
src="https://github.com/user-attachments/assets/d148ce1d-ade3-4511-84a4-c164aefdb5c5"
/>
											
										
										
											2025-10-23 22:54:45 -07:00
+								    } else if is_mcp_client_startup_timeout_error(err) {
 								        let startup_timeout_secs = match entry {
 								            Some(entry) => match entry.config.startup_timeout_sec {
 								                Some(timeout) => timeout,
 								                None => DEFAULT_STARTUP_TIMEOUT,
 								            },
 								            None => DEFAULT_STARTUP_TIMEOUT,
 								        }
 								        .as_secs();
 								        format!(
 								            "MCP client for `{server_name}` timed out after {startup_timeout_secs} seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.{server_name}]\nstartup_timeout_sec = XX"
 								        )
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
+								    } else {
 								        format!("MCP client for `{server_name}` failed to start: {err:#}")
 								    }
 								}
-												[MCP] When MCP auth expires, prompt the user to log in again. (#5300)

Similar to https://github.com/openai/codex/pull/5193 but catches a case
where the user _has_ authenticated but the auth expired or was revoked.

Before:
<img width="2976" height="632" alt="CleanShot 2025-10-17 at 14 28 11"
src="https://github.com/user-attachments/assets/7c1bd11d-c075-46cb-9298-48891eaa77fe"
/>

After:
<img width="591" height="283" alt="image"
src="https://github.com/user-attachments/assets/fc14e08c-1a33-4077-8757-ff4ed3f00f8f"
/>
											
										
										
											2025-10-17 15:16:22 -07:00
+								fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
 								    // StreamableHttpError::AuthRequired from the MCP SDK.
 								    error.to_string().contains("Auth required")
 								}
-												[MCP] Improve startup errors for timeouts and github (#5595)

1. I have seen too many reports of people hitting startup timeout errors
and thinking Codex is broken. Hopefully this will help people
self-serve. We may also want to consider raising the timeout to ~15s.
2. Make it more clear what PAT is (personal access token) in the GitHub
error

<img width="2378" height="674" alt="CleanShot 2025-10-23 at 22 05 06"
src="https://github.com/user-attachments/assets/d148ce1d-ade3-4511-84a4-c164aefdb5c5"
/>
											
										
										
											2025-10-23 22:54:45 -07:00
+								fn is_mcp_client_startup_timeout_error(error: &anyhow::Error) -> bool {
 								    let error_message = error.to_string();
 								    error_message.contains("request timed out")
 								        || error_message.contains("timed out handshaking with MCP server")
 								}
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								use crate::features::Feature;
 								use crate::features::Features;
-												Reland "refactor transcript view to handle HistoryCells" (#3753)

Reland of #3538
											
										
										
											2025-09-18 13:55:53 -07:00
+								#[cfg(test)]
 								pub(crate) use tests::make_session_and_context;
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								#[cfg(test)]
 								mod tests {
 								    use super::*;
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    use crate::config::ConfigOverrides;
 								    use crate::config::ConfigToml;
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
+								    use crate::config_types::McpServerConfig;
 								    use crate::config_types::McpServerTransportConfig;
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								    use crate::exec::ExecToolCallOutput;
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
+								    use crate::mcp::auth::McpAuthStatusEntry;
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								    use crate::tools::format_exec_output_str;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    use crate::protocol::CompactedItem;
 								    use crate::protocol::InitialHistory;
 								    use crate::protocol::ResumedHistory;
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    use crate::state::TaskKind;
 								    use crate::tasks::SessionTask;
 								    use crate::tasks::SessionTaskContext;
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								    use crate::tools::ToolRouter;
-												chore: clean `handle_container_exec_with_params` (#5516)

Drop `handle_container_exec_with_params` to have simpler and more
straight forward execution path
											
										
										
											2025-10-23 09:24:01 +01:00
+								    use crate::tools::context::ToolInvocation;
 								    use crate::tools::context::ToolOutput;
 								    use crate::tools::context::ToolPayload;
 								    use crate::tools::handlers::ShellHandler;
 								    use crate::tools::registry::ToolHandler;
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								    use crate::turn_diff_tracker::TurnDiffTracker;
-												fix: remove mcp-types from app server protocol (#4537)

We continue the separation between `codex app-server` and `codex
mcp-server`.

In particular, we introduce a new crate, `codex-app-server-protocol`,
and migrate `codex-rs/protocol/src/mcp_protocol.rs` into it, renaming it
`codex-rs/app-server-protocol/src/protocol.rs`.

Because `ConversationId` was defined in `mcp_protocol.rs`, we move it
into its own file, `codex-rs/protocol/src/conversation_id.rs`, and
because it is referenced in a ton of places, we have to touch a lot of
files as part of this PR.

We also decide to get away from proper JSON-RPC 2.0 semantics, so we
also introduce `codex-rs/app-server-protocol/src/jsonrpc_lite.rs`, which
is basically the same `JSONRPCMessage` type defined in `mcp-types`
except with all of the `"jsonrpc": "2.0"` removed.

Getting rid of `"jsonrpc": "2.0"` makes our serialization logic
considerably simpler, as we can lean heavier on serde to serialize
directly into the wire format that we use now.
											
										
										
											2025-09-30 19:16:26 -07:00
+								    use codex_app_server_protocol::AuthMode;
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    use codex_protocol::models::ContentItem;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								    use codex_protocol::models::ResponseItem;
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
+								    use codex_protocol::protocol::McpAuthStatus;
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								    use std::time::Duration;
 								    use tokio::time::sleep;
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								    use mcp_types::ContentBlock;
 								    use mcp_types::TextContent;
 								    use pretty_assertions::assert_eq;
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								    use serde::Deserialize;
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								    use serde_json::json;
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    use std::path::PathBuf;
 								    use std::sync::Arc;
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								    use std::time::Duration as StdDuration;
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    #[test]
 								    fn reconstruct_history_matches_live_compactions() {
 								        let (session, turn_context) = make_session_and_context();
 								        let (rollout_items, expected) = sample_rollout(&session, &turn_context);
 								        let reconstructed = session.reconstruct_history_from_rollout(&turn_context, &rollout_items);
 								        assert_eq!(expected, reconstructed);
 								    }
 								    #[test]
 								    fn record_initial_history_reconstructs_resumed_transcript() {
 								        let (session, turn_context) = make_session_and_context();
 								        let (rollout_items, expected) = sample_rollout(&session, &turn_context);
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								        tokio_test::block_on(session.record_initial_history(InitialHistory::Resumed(
 								            ResumedHistory {
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								                conversation_id: ConversationId::default(),
 								                history: rollout_items,
 								                rollout_path: PathBuf::from("/tmp/resume.jsonl"),
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            },
 								        )));
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
-												Filter out reasoning items from previous turns (#5857)

Reduces request size and prevents 400 errors when switching between API
orgs.

Based on Responses API behavior described in
https://cookbook.openai.com/examples/responses_api/reasoning_items#caching
											
										
										
											2025-10-28 11:39:34 -07:00
+								        let actual = tokio_test::block_on(async {
 								            session.state.lock().await.clone_history().get_history()
 								        });
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        assert_eq!(expected, actual);
 								    }
 								    #[test]
 								    fn record_initial_history_reconstructs_forked_transcript() {
 								        let (session, turn_context) = make_session_and_context();
 								        let (rollout_items, expected) = sample_rollout(&session, &turn_context);
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								        tokio_test::block_on(session.record_initial_history(InitialHistory::Forked(rollout_items)));
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
-												Filter out reasoning items from previous turns (#5857)

Reduces request size and prevents 400 errors when switching between API
orgs.

Based on Responses API behavior described in
https://cookbook.openai.com/examples/responses_api/reasoning_items#caching
											
										
										
											2025-10-28 11:39:34 -07:00
+								        let actual = tokio_test::block_on(async {
 								            session.state.lock().await.clone_history().get_history()
 								        });
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        assert_eq!(expected, actual);
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								    }
 								    #[test]
 								    fn prefers_structured_content_when_present() {
 								        let ctr = CallToolResult {
 								            // Content present but should be ignored because structured_content is set.
 								            content: vec![text_block("ignored")],
 								            is_error: None,
 								            structured_content: Some(json!({
 								                "ok": true,
 								                "value": 42
 								            })),
 								        };
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								        let got = FunctionCallOutputPayload::from(&ctr);
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								        let expected = FunctionCallOutputPayload {
 								            content: serde_json::to_string(&json!({
 								                "ok": true,
 								                "value": 42
 								            }))
 								            .unwrap(),
 								            success: Some(true),
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								            ..Default::default()
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								        };
 								        assert_eq!(expected, got);
 								    }
-												Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the
timeout.
											
										
										
											2025-09-14 14:38:26 -07:00
+								    #[test]
 								    fn includes_timed_out_message() {
 								        let exec = ExecToolCallOutput {
 								            exit_code: 0,
 								            stdout: StreamOutput::new(String::new()),
 								            stderr: StreamOutput::new(String::new()),
 								            aggregated_output: StreamOutput::new("Command output".to_string()),
 								            duration: StdDuration::from_secs(1),
 								            timed_out: true,
 								        };
 								        let out = format_exec_output_str(&exec);
 								        assert_eq!(
 								            out,
 								            "command timed out after 1000 milliseconds\nCommand output"
 								        );
 								    }
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								    #[test]
 								    fn falls_back_to_content_when_structured_is_null() {
 								        let ctr = CallToolResult {
 								            content: vec![text_block("hello"), text_block("world")],
 								            is_error: None,
 								            structured_content: Some(serde_json::Value::Null),
 								        };
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								        let got = FunctionCallOutputPayload::from(&ctr);
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								        let expected = FunctionCallOutputPayload {
 								            content: serde_json::to_string(&vec![text_block("hello"), text_block("world")])
 								                .unwrap(),
 								            success: Some(true),
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								            ..Default::default()
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								        };
 								        assert_eq!(expected, got);
 								    }
 								    #[test]
 								    fn success_flag_reflects_is_error_true() {
 								        let ctr = CallToolResult {
 								            content: vec![text_block("unused")],
 								            is_error: Some(true),
 								            structured_content: Some(json!({ "message": "bad" })),
 								        };
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								        let got = FunctionCallOutputPayload::from(&ctr);
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								        let expected = FunctionCallOutputPayload {
 								            content: serde_json::to_string(&json!({ "message": "bad" })).unwrap(),
 								            success: Some(false),
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								            ..Default::default()
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								        };
 								        assert_eq!(expected, got);
 								    }
 								    #[test]
 								    fn success_flag_true_with_no_error_and_content_used() {
 								        let ctr = CallToolResult {
 								            content: vec![text_block("alpha")],
 								            is_error: Some(false),
 								            structured_content: None,
 								        };
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								        let got = FunctionCallOutputPayload::from(&ctr);
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								        let expected = FunctionCallOutputPayload {
 								            content: serde_json::to_string(&vec![text_block("alpha")]).unwrap(),
 								            success: Some(true),
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								            ..Default::default()
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								        };
 								        assert_eq!(expected, got);
 								    }
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
 								    fn text_block(s: &str) -> ContentBlock {
 								        ContentBlock::TextContent(TextContent {
 								            annotations: None,
 								            text: s.to_string(),
 								            r#type: "text".to_string(),
 								        })
 								    }
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								    fn otel_event_manager(conversation_id: ConversationId, config: &Config) -> OtelEventManager {
 								        OtelEventManager::new(
 								            conversation_id,
 								            config.model.as_str(),
 								            config.model_family.slug.as_str(),
 								            None,
-												[codex][otel] propagate user email in otel events (#5223)

include user email into otel events for proper user-level attribution in
case of workspace setup
											
										
										
											2025-10-15 17:53:33 -07:00
+								            Some("test@test.com".to_string()),
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								            Some(AuthMode::ChatGPT),
 								            false,
 								            "test".to_string(),
 								        )
 								    }
-												Reland "refactor transcript view to handle HistoryCells" (#3753)

Reland of #3538
											
										
										
											2025-09-18 13:55:53 -07:00
+								    pub(crate) fn make_session_and_context() -> (Session, TurnContext) {
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        let (tx_event, _rx_event) = async_channel::unbounded();
 								        let codex_home = tempfile::tempdir().expect("create temp dir");
 								        let config = Config::load_from_base_config_with_overrides(
 								            ConfigToml::default(),
 								            ConfigOverrides::default(),
 								            codex_home.path().to_path_buf(),
 								        )
 								        .expect("load default test config");
 								        let config = Arc::new(config);
 								        let conversation_id = ConversationId::default();
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        let otel_event_manager = otel_event_manager(conversation_id, config.as_ref());
-												[Auth] Choose which auth storage to use based on config (#5792)

This PR is a follow-up to #5591. It allows users to choose which auth
storage mode they want by using the new
`cli_auth_credentials_store_mode` config.
											
										
										
											2025-10-27 19:41:49 -07:00
+								        let auth_manager = AuthManager::shared(
 								            config.cwd.clone(),
 								            false,
 								            config.cli_auth_credentials_store_mode,
 								        );
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
 								        let session_configuration = SessionConfiguration {
 								            provider: config.model_provider.clone(),
 								            model: config.model.clone(),
 								            model_reasoning_effort: config.model_reasoning_effort,
 								            model_reasoning_summary: config.model_reasoning_summary,
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								            user_instructions: config.user_instructions.clone(),
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            base_instructions: config.base_instructions.clone(),
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								            approval_policy: config.approval_policy,
 								            sandbox_policy: config.sandbox_policy.clone(),
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            cwd: config.cwd.clone(),
 								            original_config_do_not_use: Arc::clone(&config),
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								            features: Features::default(),
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        };
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
 								        let state = SessionState::new(session_configuration.clone());
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        let services = SessionServices {
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								            mcp_connection_manager: McpConnectionManager::default(),
 								            unified_exec_manager: UnifiedExecSessionManager::default(),
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            notifier: UserNotifier::new(None),
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								            rollout: Mutex::new(None),
 								            user_shell: shell::Shell::Unknown,
 								            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            auth_manager: Arc::clone(&auth_manager),
 								            otel_event_manager: otel_event_manager.clone(),
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								            tool_approvals: Mutex::new(ApprovalStore::default()),
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        };
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								        let turn_context = Session::make_turn_context(
 								            Some(Arc::clone(&auth_manager)),
 								            &otel_event_manager,
 								            session_configuration.provider.clone(),
 								            &session_configuration,
 								            conversation_id,
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								            "turn_id".to_string(),
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								        );
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								        let session = Session {
 								            conversation_id,
 								            tx_event,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            state: Mutex::new(state),
-												ref: full state refactor (#4174)

## Current State Observations
- `Session` currently holds many unrelated responsibilities (history,
approval queues, task handles, rollout recorder, shell discovery, token
tracking, etc.), making it hard to reason about ownership and lifetimes.
- The anonymous `State` struct inside `codex.rs` mixes session-long data
with turn-scoped queues and approval bookkeeping.
- Turn execution (`run_task`) relies on ad-hoc local variables that
should conceptually belong to a per-turn state object.
- External modules (`codex::compact`, tests) frequently poke the raw
`Session.state` mutex, which couples them to implementation details.
- Interrupts, approvals, and rollout persistence all have bespoke
cleanup paths, contributing to subtle bugs when a turn is aborted
mid-flight.

## Desired End State
- Keep a slim `Session` object that acts as the orchestrator and façade.
It should expose a focused API (submit, approvals, interrupts, event
emission) without storing unrelated fields directly.
- Introduce a `state` module that encapsulates all mutable data
structures:
- `SessionState`: session-persistent data (history, approved commands,
token/rate-limit info, maybe user preferences).
- `ActiveTurn`: metadata for the currently running turn (sub-id, task
kind, abort handle) and an `Arc<TurnState>`.
- `TurnState`: all turn-scoped pieces (pending inputs, approval waiters,
diff tracker, review history, auto-compact flags, last agent message,
outstanding tool call bookkeeping).
- Group long-lived helpers/managers into a dedicated `SessionServices`
struct so `Session` does not accumulate "random" fields.
- Provide clear, lock-safe APIs so other modules never touch raw
mutexes.
- Ensure every turn creates/drops a `TurnState` and that
interrupts/finishes delegate cleanup to it.
											
										
										
											2025-09-25 11:16:06 +01:00
+								            active_turn: Mutex::new(None),
 								            services,
-												chore: use tokio mutex and async function to prevent blocking a worker (#3850)

### Why Use `tokio::sync::Mutex`

`std::sync::Mutex` are not _async-aware_. As a result, they will block
the entire thread instead of just yielding the task. Furthermore they
can be poisoned which is not the case of `tokio` Mutex.
This allows the Tokio runtime to continue running other tasks while
waiting for the lock, preventing deadlocks and performance bottlenecks.

In general, this is preferred in async environment
											
										
										
											2025-09-18 18:21:52 +01:00
+								            next_internal_sub_id: AtomicU64::new(0),
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        };
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        (session, turn_context)
 								    }
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    // Like make_session_and_context, but returns Arc<Session> and the event receiver
 								    // so tests can assert on emitted events.
 								    fn make_session_and_context_with_rx() -> (
 								        Arc<Session>,
 								        Arc<TurnContext>,
 								        async_channel::Receiver<Event>,
 								    ) {
 								        let (tx_event, rx_event) = async_channel::unbounded();
 								        let codex_home = tempfile::tempdir().expect("create temp dir");
 								        let config = Config::load_from_base_config_with_overrides(
 								            ConfigToml::default(),
 								            ConfigOverrides::default(),
 								            codex_home.path().to_path_buf(),
 								        )
 								        .expect("load default test config");
 								        let config = Arc::new(config);
 								        let conversation_id = ConversationId::default();
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        let otel_event_manager = otel_event_manager(conversation_id, config.as_ref());
-												[Auth] Choose which auth storage to use based on config (#5792)

This PR is a follow-up to #5591. It allows users to choose which auth
storage mode they want by using the new
`cli_auth_credentials_store_mode` config.
											
										
										
											2025-10-27 19:41:49 -07:00
+								        let auth_manager = AuthManager::shared(
 								            config.cwd.clone(),
 								            false,
 								            config.cli_auth_credentials_store_mode,
 								        );
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
 								        let session_configuration = SessionConfiguration {
 								            provider: config.model_provider.clone(),
 								            model: config.model.clone(),
 								            model_reasoning_effort: config.model_reasoning_effort,
 								            model_reasoning_summary: config.model_reasoning_summary,
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								            user_instructions: config.user_instructions.clone(),
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            base_instructions: config.base_instructions.clone(),
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								            approval_policy: config.approval_policy,
 								            sandbox_policy: config.sandbox_policy.clone(),
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            cwd: config.cwd.clone(),
 								            original_config_do_not_use: Arc::clone(&config),
-												feat: async ghost commit (#5618)


											
										
										
											2025-10-27 10:09:10 +00:00
+								            features: Features::default(),
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								        };
 								        let state = SessionState::new(session_configuration.clone());
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        let services = SessionServices {
 								            mcp_connection_manager: McpConnectionManager::default(),
 								            unified_exec_manager: UnifiedExecSessionManager::default(),
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            notifier: UserNotifier::new(None),
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								            rollout: Mutex::new(None),
 								            user_shell: shell::Shell::Unknown,
 								            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            auth_manager: Arc::clone(&auth_manager),
 								            otel_event_manager: otel_event_manager.clone(),
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								            tool_approvals: Mutex::new(ApprovalStore::default()),
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        };
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								        let turn_context = Arc::new(Session::make_turn_context(
 								            Some(Arc::clone(&auth_manager)),
 								            &otel_event_manager,
 								            session_configuration.provider.clone(),
 								            &session_configuration,
 								            conversation_id,
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								            "turn_id".to_string(),
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								        ));
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        let session = Arc::new(Session {
 								            conversation_id,
 								            tx_event,
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
+								            state: Mutex::new(state),
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								            active_turn: Mutex::new(None),
 								            services,
 								            next_internal_sub_id: AtomicU64::new(0),
 								        });
-												Create independent TurnContexts (#5308)

The goal of this change:
1. Unify user input and user turn implementation.
2. Have a single place where turn/session setting overrides are applied.
3. Have a single place where turn context is created.
4. Create TurnContext only for actual turn and have a separate structure
for current session settings (reuse ConfigureSession)
											
										
										
											2025-10-18 17:43:08 -07:00
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        (session, turn_context, rx_event)
 								    }
 								    #[derive(Clone, Copy)]
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								    struct NeverEndingTask {
 								        kind: TaskKind,
 								        listen_to_cancellation_token: bool,
 								    }
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
 								    #[async_trait::async_trait]
 								    impl SessionTask for NeverEndingTask {
 								        fn kind(&self) -> TaskKind {
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								            self.kind
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        }
 								        async fn run(
 								            self: Arc<Self>,
 								            _session: Arc<SessionTaskContext>,
 								            _ctx: Arc<TurnContext>,
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								            _input: Vec<UserInput>,
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								            cancellation_token: CancellationToken,
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        ) -> Option<String> {
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								            if self.listen_to_cancellation_token {
 								                cancellation_token.cancelled().await;
 								                return None;
 								            }
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								            loop {
 								                sleep(Duration::from_secs(60)).await;
 								            }
 								        }
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								        async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								            if let TaskKind::Review = self.kind {
-												Pass TurnContext around instead of sub_id (#5421)

Today `sub_id` is an ID of a single incoming Codex Op submition. We then
associate all events triggered by this operation using the same
`sub_id`.

At the same time we are also creating a TurnContext per submission and
we'd like to start associating some events (item added/item completed)
with an entire turn instead of just the operation that started it.

Using turn context when sending events give us flexibility to change
notification scheme.
											
										
										
											2025-10-21 08:04:16 -07:00
+								                exit_review_mode(session.clone_session(), ctx, None).await;
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								            }
 								        }
 								    }
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 								    #[test_log::test]
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    async fn abort_regular_task_emits_turn_aborted_only() {
 								        let (sess, tc, rx) = make_session_and_context_with_rx();
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								        let input = vec![UserInput::Text {
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								            text: "hello".to_string(),
 								        }];
 								        sess.spawn_task(
 								            Arc::clone(&tc),
 								            input,
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								            NeverEndingTask {
 								                kind: TaskKind::Regular,
 								                listen_to_cancellation_token: false,
 								            },
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        )
 								        .await;
 								        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								        let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
 								            .await
 								            .expect("timeout waiting for event")
 								            .expect("event");
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        match evt.msg {
 								            EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason),
 								            other => panic!("unexpected event: {other:?}"),
 								        }
 								        assert!(rx.try_recv().is_err());
 								    }
 								    #[tokio::test]
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								    async fn abort_gracefuly_emits_turn_aborted_only() {
 								        let (sess, tc, rx) = make_session_and_context_with_rx();
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								        let input = vec![UserInput::Text {
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								            text: "hello".to_string(),
 								        }];
 								        sess.spawn_task(
 								            Arc::clone(&tc),
 								            input,
 								            NeverEndingTask {
 								                kind: TaskKind::Regular,
 								                listen_to_cancellation_token: true,
 								            },
 								        )
 								        .await;
 								        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
 								        let evt = rx.recv().await.expect("event");
 								        match evt.msg {
 								            EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason),
 								            other => panic!("unexpected event: {other:?}"),
 								        }
 								        assert!(rx.try_recv().is_err());
 								    }
 								    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								    async fn abort_review_task_emits_exited_then_aborted_and_records_history() {
 								        let (sess, tc, rx) = make_session_and_context_with_rx();
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								        let input = vec![UserInput::Text {
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								            text: "start review".to_string(),
 								        }];
 								        sess.spawn_task(
 								            Arc::clone(&tc),
 								            input,
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								            NeverEndingTask {
 								                kind: TaskKind::Review,
 								                listen_to_cancellation_token: false,
 								            },
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        )
 								        .await;
 								        sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
-												Support graceful agent interruption (#5287)


											
										
										
											2025-10-17 11:52:57 -07:00
+								        let first = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
 								            .await
 								            .expect("timeout waiting for first event")
 								            .expect("first event");
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        match first.msg {
 								            EventMsg::ExitedReviewMode(ev) => assert!(ev.review_output.is_none()),
 								            other => panic!("unexpected first event: {other:?}"),
 								        }
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
+								        loop {
 								            let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
 								                .await
 								                .expect("timeout waiting for next event")
 								                .expect("event");
 								            match evt.msg {
 								                EventMsg::RawResponseItem(_) => continue,
 								                EventMsg::TurnAborted(e) => {
 								                    assert_eq!(TurnAbortReason::Interrupted, e.reason);
 								                    break;
 								                }
 								                other => panic!("unexpected second event: {other:?}"),
 								            }
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        }
-												Filter out reasoning items from previous turns (#5857)

Reduces request size and prevents 400 errors when switching between API
orgs.

Based on Responses API behavior described in
https://cookbook.openai.com/examples/responses_api/reasoning_items#caching
											
										
										
											2025-10-28 11:39:34 -07:00
+								        let history = sess.clone_history().await.get_history();
-												ref: state - 2 (#4229)

Extracting tasks in a module and start abstraction behind a Trait (more
to come on this but each task will be tackled in a dedicated PR)
The goal was to drop the ActiveTask and to have a (potentially) set of
tasks during each turn
											
										
										
											2025-09-26 15:49:08 +02:00
+								        let found = history.iter().any(|item| match item {
 								            ResponseItem::Message { role, content, .. } if role == "user" => {
 								                content.iter().any(|ci| match ci {
 								                    ContentItem::InputText { text } => {
 								                        text.contains("<user_action>")
 								                            && text.contains("review")
 								                            && text.contains("interrupted")
 								                    }
 								                    _ => false,
 								                })
 								            }
 								            _ => false,
 								        });
 								        assert!(
 								            found,
 								            "synthetic review interruption not recorded in history"
 								        );
 								    }
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								    #[tokio::test]
 								    async fn fatal_tool_error_stops_turn_and_reports_error() {
 								        let (session, turn_context, _rx) = make_session_and_context_with_rx();
 								        let router = ToolRouter::from_config(
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								            &turn_context.tools_config,
 								            Some(session.services.mcp_connection_manager.list_all_tools()),
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								        );
 								        let item = ResponseItem::CustomToolCall {
 								            id: None,
 								            status: None,
 								            call_id: "call-1".to_string(),
 								            name: "shell".to_string(),
 								            input: "{}".to_string(),
 								        };
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								        let call = ToolRouter::build_tool_call(session.as_ref(), item.clone())
 								            .expect("build tool call")
 								            .expect("tool call present");
 								        let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
 								        let err = router
 								            .dispatch_tool_call(
 								                Arc::clone(&session),
 								                Arc::clone(&turn_context),
 								                tracker,
 								                call,
 								            )
 								            .await
 								            .expect_err("expected fatal error");
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
 								        match err {
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								            FunctionCallError::Fatal(message) => {
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								                assert_eq!(message, "tool shell invoked with incompatible payload");
 								            }
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								            other => panic!("expected FunctionCallError::Fatal, got {other:?}"),
-												chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
											
										
										
											2025-10-03 13:21:06 +01:00
+								        }
 								    }
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    fn sample_rollout(
 								        session: &Session,
 								        turn_context: &TurnContext,
 								    ) -> (Vec<RolloutItem>, Vec<ResponseItem>) {
 								        let mut rollout_items = Vec::new();
 								        let mut live_history = ConversationHistory::new();
 								        let initial_context = session.build_initial_context(turn_context);
 								        for item in &initial_context {
 								            rollout_items.push(RolloutItem::ResponseItem(item.clone()));
 								        }
 								        live_history.record_items(initial_context.iter());
 								        let user1 = ResponseItem::Message {
 								            id: None,
 								            role: "user".to_string(),
 								            content: vec![ContentItem::InputText {
 								                text: "first user".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&user1));
 								        rollout_items.push(RolloutItem::ResponseItem(user1.clone()));
 								        let assistant1 = ResponseItem::Message {
 								            id: None,
 								            role: "assistant".to_string(),
 								            content: vec![ContentItem::OutputText {
 								                text: "assistant reply one".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&assistant1));
 								        rollout_items.push(RolloutItem::ResponseItem(assistant1.clone()));
 								        let summary1 = "summary one";
-												Move changing turn input functionalities to `ConversationHistory` (#5473)

We are doing some ad-hoc logic while dealing with conversation history.
Ideally, we shouldn't mutate `vec[responseitem]` manually at all and
should depend on `ConversationHistory` for those changes.

Those changes are:
- Adding input to the history
- Removing items from the history
- Correcting history

I am also adding some `error` logs for cases we shouldn't ideally face.
For example, we shouldn't be missing `toolcalls` or `outputs`. We
shouldn't hit `ContextWindowExceeded` while performing `compact`

This refactor will give us granular control over our context management.
											
										
										
											2025-10-22 13:08:46 -07:00
+								        let snapshot1 = live_history.get_history();
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        let user_messages1 = collect_user_messages(&snapshot1);
 								        let rebuilt1 = build_compacted_history(
 								            session.build_initial_context(turn_context),
 								            &user_messages1,
 								            summary1,
 								        );
 								        live_history.replace(rebuilt1);
 								        rollout_items.push(RolloutItem::Compacted(CompactedItem {
 								            message: summary1.to_string(),
 								        }));
 								        let user2 = ResponseItem::Message {
 								            id: None,
 								            role: "user".to_string(),
 								            content: vec![ContentItem::InputText {
 								                text: "second user".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&user2));
 								        rollout_items.push(RolloutItem::ResponseItem(user2.clone()));
 								        let assistant2 = ResponseItem::Message {
 								            id: None,
 								            role: "assistant".to_string(),
 								            content: vec![ContentItem::OutputText {
 								                text: "assistant reply two".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&assistant2));
 								        rollout_items.push(RolloutItem::ResponseItem(assistant2.clone()));
 								        let summary2 = "summary two";
-												Move changing turn input functionalities to `ConversationHistory` (#5473)

We are doing some ad-hoc logic while dealing with conversation history.
Ideally, we shouldn't mutate `vec[responseitem]` manually at all and
should depend on `ConversationHistory` for those changes.

Those changes are:
- Adding input to the history
- Removing items from the history
- Correcting history

I am also adding some `error` logs for cases we shouldn't ideally face.
For example, we shouldn't be missing `toolcalls` or `outputs`. We
shouldn't hit `ContextWindowExceeded` while performing `compact`

This refactor will give us granular control over our context management.
											
										
										
											2025-10-22 13:08:46 -07:00
+								        let snapshot2 = live_history.get_history();
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        let user_messages2 = collect_user_messages(&snapshot2);
 								        let rebuilt2 = build_compacted_history(
 								            session.build_initial_context(turn_context),
 								            &user_messages2,
 								            summary2,
 								        );
 								        live_history.replace(rebuilt2);
 								        rollout_items.push(RolloutItem::Compacted(CompactedItem {
 								            message: summary2.to_string(),
 								        }));
 								        let user3 = ResponseItem::Message {
 								            id: None,
 								            role: "user".to_string(),
 								            content: vec![ContentItem::InputText {
 								                text: "third user".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&user3));
 								        rollout_items.push(RolloutItem::ResponseItem(user3.clone()));
 								        let assistant3 = ResponseItem::Message {
 								            id: None,
 								            role: "assistant".to_string(),
 								            content: vec![ContentItem::OutputText {
 								                text: "assistant reply three".to_string(),
 								            }],
 								        };
 								        live_history.record_items(std::iter::once(&assistant3));
 								        rollout_items.push(RolloutItem::ResponseItem(assistant3.clone()));
-												Move changing turn input functionalities to `ConversationHistory` (#5473)

We are doing some ad-hoc logic while dealing with conversation history.
Ideally, we shouldn't mutate `vec[responseitem]` manually at all and
should depend on `ConversationHistory` for those changes.

Those changes are:
- Adding input to the history
- Removing items from the history
- Correcting history

I am also adding some `error` logs for cases we shouldn't ideally face.
For example, we shouldn't be missing `toolcalls` or `outputs`. We
shouldn't hit `ContextWindowExceeded` while performing `compact`

This refactor will give us granular control over our context management.
											
										
										
											2025-10-22 13:08:46 -07:00
+								        (rollout_items, live_history.get_history())
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								    }
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
 								    #[tokio::test]
 								    async fn rejects_escalated_permissions_when_policy_not_on_request() {
 								        use crate::exec::ExecParams;
 								        use crate::protocol::AskForApproval;
 								        use crate::protocol::SandboxPolicy;
 								        use crate::turn_diff_tracker::TurnDiffTracker;
 								        use std::collections::HashMap;
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								        let (session, mut turn_context_raw) = make_session_and_context();
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								        // Ensure policy is NOT OnRequest so the early rejection path triggers
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								        turn_context_raw.approval_policy = AskForApproval::OnFailure;
 								        let session = Arc::new(session);
 								        let mut turn_context = Arc::new(turn_context_raw);
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
 								        let params = ExecParams {
 								            command: if cfg!(windows) {
 								                vec![
 								                    "cmd.exe".to_string(),
 								                    "/C".to_string(),
 								                    "echo hi".to_string(),
 								                ]
 								            } else {
 								                vec![
 								                    "/bin/sh".to_string(),
 								                    "-c".to_string(),
 								                    "echo hi".to_string(),
 								                ]
 								            },
 								            cwd: turn_context.cwd.clone(),
 								            timeout_ms: Some(1000),
 								            env: HashMap::new(),
 								            with_escalated_permissions: Some(true),
 								            justification: Some("test".to_string()),
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								            arg0: None,
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								        };
 								        let params2 = ExecParams {
 								            with_escalated_permissions: Some(false),
 								            ..params.clone()
 								        };
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								        let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
-												OpenTelemetry events (#2103)

### Title

## otel

Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.

```toml
[otel]
environment = "staging"   # defaults to "dev"
exporter = "none"          # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false    # defaults to false; redact prompt text unless explicitly enabled
```

Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.

### Event catalog

Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.

With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):

- `codex.api_request`
  - `cf_ray` (optional)
  - `attempt`
  - `duration_ms`
  - `http.response.status_code` (optional)
  - `error.message` (failures)
- `codex.sse_event`
  - `event.kind`
  - `duration_ms`
  - `error.message` (failures)
  - `input_token_count` (completion only)
  - `output_token_count` (completion only)
  - `cached_token_count` (completion only, optional)
  - `reasoning_token_count` (completion only, optional)
  - `tool_token_count` (completion only)
- `codex.user_prompt`
  - `prompt_length`
  - `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
  - `tool_name`
  - `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
  - `source` (`config` or `user`)
- `codex.tool_result`
  - `tool_name`
  - `call_id`
  - `arguments`
  - `duration_ms` (execution time for the tool)
  - `success` (`"true"` or `"false"`)
  - `output`

### Choosing an exporter

Set `otel.exporter` to control where events go:

- `none` – leaves instrumentation active but skips exporting. This is
the
  default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
  endpoint, protocol, and headers your collector expects:

  ```toml
  [otel]
  exporter = { otlp-http = {
    endpoint = "https://otel.example.com/v1/logs",
    protocol = "binary",
    headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
  }}
  ```

- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
  metadata headers:

  ```toml
  [otel]
  exporter = { otlp-grpc = {
    endpoint = "https://otel.example.com:4317",
    headers = { "x-otlp-meta" = "abc123" }
  }}
  ```

If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.

If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.

---------

Co-authored-by: Anton Panasenko <apanasenko@openai.com>
											
										
										
											2025-09-29 19:30:55 +01:00
+								        let tool_name = "shell";
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								        let call_id = "test-call".to_string();
-												chore: clean `handle_container_exec_with_params` (#5516)

Drop `handle_container_exec_with_params` to have simpler and more
straight forward execution path
											
										
										
											2025-10-23 09:24:01 +01:00
+								        let handler = ShellHandler;
 								        let resp = handler
 								            .handle(ToolInvocation {
 								                session: Arc::clone(&session),
 								                turn: Arc::clone(&turn_context),
 								                tracker: Arc::clone(&turn_diff_tracker),
 								                call_id,
 								                tool_name: tool_name.to_string(),
 								                payload: ToolPayload::Function {
 								                    arguments: serde_json::json!({
 								                        "command": params.command.clone(),
 								                        "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
 								                        "timeout_ms": params.timeout_ms,
 								                        "with_escalated_permissions": params.with_escalated_permissions,
 								                        "justification": params.justification.clone(),
 								                    })
 								                    .to_string(),
 								                },
 								            })
 								            .await;
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								        let Err(FunctionCallError::RespondToModel(output)) = resp else {
 								            panic!("expected error result");
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
+								        };
 								        let expected = format!(
 								            "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
 								            policy = turn_context.approval_policy
 								        );
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								        pretty_assertions::assert_eq!(output, expected);
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
 								        // Now retry the same command WITHOUT escalated permissions; should succeed.
 								        // Force DangerFullAccess to avoid platform sandbox dependencies in tests.
-												feat: parallel tool calls (#4663)

Add parallel tool calls. This is configurable at model level and tool
level
											
										
										
											2025-10-05 17:10:49 +01:00
+								        Arc::get_mut(&mut turn_context)
 								            .expect("unique turn context Arc")
 								            .sandbox_policy = SandboxPolicy::DangerFullAccess;
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
-												chore: clean `handle_container_exec_with_params` (#5516)

Drop `handle_container_exec_with_params` to have simpler and more
straight forward execution path
											
										
										
											2025-10-23 09:24:01 +01:00
+								        let resp2 = handler
 								            .handle(ToolInvocation {
 								                session: Arc::clone(&session),
 								                turn: Arc::clone(&turn_context),
 								                tracker: Arc::clone(&turn_diff_tracker),
 								                call_id: "test-call-2".to_string(),
 								                tool_name: tool_name.to_string(),
 								                payload: ToolPayload::Function {
 								                    arguments: serde_json::json!({
 								                        "command": params2.command.clone(),
 								                        "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
 								                        "timeout_ms": params2.timeout_ms,
 								                        "with_escalated_permissions": params2.with_escalated_permissions,
 								                        "justification": params2.justification.clone(),
 								                    })
 								                    .to_string(),
 								                },
 								            })
 								            .await;
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
-												chore: clean `handle_container_exec_with_params` (#5516)

Drop `handle_container_exec_with_params` to have simpler and more
straight forward execution path
											
										
										
											2025-10-23 09:24:01 +01:00
+								        let output = match resp2.expect("expected Ok result") {
 								            ToolOutput::Function { content, .. } => content,
 								            _ => panic!("unexpected tool output"),
 								        };
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
 								        #[derive(Deserialize, PartialEq, Eq, Debug)]
 								        struct ResponseExecMetadata {
 								            exit_code: i32,
 								        }
 								        #[derive(Deserialize)]
 								        struct ResponseExecOutput {
 								            output: String,
 								            metadata: ResponseExecMetadata,
 								        }
 								        let exec_output: ResponseExecOutput =
-												Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
											
										
										
											2025-09-24 10:27:35 -07:00
+								            serde_json::from_str(&output).expect("valid exec output json");
-												Use a unified shell tell to not break cache (#3814)

Currently, we change the tool description according to the sandbox
policy and approval policy. This breaks the cache when the user hits
`/approvals`. This PR does the following:
- Always use the shell with escalation parameter:
- removes `create_shell_tool_for_sandbox` and always uses unified tool
via `create_shell_tool`
- Reject the func call when the model uses escalation parameter when it
cannot.
											
										
										
											2025-09-18 17:08:28 -07:00
 								        pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
 								        assert!(exec_output.output.contains("hi"));
 								    }
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
 								    #[test]
 								    fn mcp_init_error_display_prompts_for_github_pat() {
 								        let server_name = "github";
 								        let entry = McpAuthStatusEntry {
 								            config: McpServerConfig {
 								                transport: McpServerTransportConfig::StreamableHttp {
 								                    url: "https://api.githubcopilot.com/mcp/".to_string(),
 								                    bearer_token_env_var: None,
 								                    http_headers: None,
 								                    env_http_headers: None,
 								                },
 								                enabled: true,
 								                startup_timeout_sec: None,
 								                tool_timeout_sec: None,
 								                enabled_tools: None,
 								                disabled_tools: None,
 								            },
 								            auth_status: McpAuthStatus::Unsupported,
 								        };
 								        let err = anyhow::anyhow!("OAuth is unsupported");
 								        let display = mcp_init_error_display(server_name, Some(&entry), &err);
 								        let expected = format!(
-												[MCP] Improve startup errors for timeouts and github (#5595)

1. I have seen too many reports of people hitting startup timeout errors
and thinking Codex is broken. Hopefully this will help people
self-serve. We may also want to consider raising the timeout to ~15s.
2. Make it more clear what PAT is (personal access token) in the GitHub
error

<img width="2378" height="674" alt="CleanShot 2025-10-23 at 22 05 06"
src="https://github.com/user-attachments/assets/d148ce1d-ade3-4511-84a4-c164aefdb5c5"
/>
											
										
										
											2025-10-23 22:54:45 -07:00
+								            "GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
-												[MCP] Dedicated error message for GitHub MCPs missing a personal access token (#5393)

Because the GitHub MCP is one of the most popular MCPs and it
confusingly doesn't support OAuth, we should make it more clear how to
make it work so people don't think Codex is broken.
											
										
										
											2025-10-20 16:23:26 -07:00
+								        );
 								        assert_eq!(expected, display);
 								    }
 								    #[test]
 								    fn mcp_init_error_display_prompts_for_login_when_auth_required() {
 								        let server_name = "example";
 								        let err = anyhow::anyhow!("Auth required for server");
 								        let display = mcp_init_error_display(server_name, None, &err);
 								        let expected = format!(
 								            "The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
 								        );
 								        assert_eq!(expected, display);
 								    }
 								    #[test]
 								    fn mcp_init_error_display_reports_generic_errors() {
 								        let server_name = "custom";
 								        let entry = McpAuthStatusEntry {
 								            config: McpServerConfig {
 								                transport: McpServerTransportConfig::StreamableHttp {
 								                    url: "https://example.com".to_string(),
 								                    bearer_token_env_var: Some("TOKEN".to_string()),
 								                    http_headers: None,
 								                    env_http_headers: None,
 								                },
 								                enabled: true,
 								                startup_timeout_sec: None,
 								                tool_timeout_sec: None,
 								                enabled_tools: None,
 								                disabled_tools: None,
 								            },
 								            auth_status: McpAuthStatus::Unsupported,
 								        };
 								        let err = anyhow::anyhow!("boom");
 								        let display = mcp_init_error_display(server_name, Some(&entry), &err);
 								        let expected = format!("MCP client for `{server_name}` failed to start: {err:#}");
 								        assert_eq!(expected, display);
 								    }
-												[MCP] Improve startup errors for timeouts and github (#5595)

1. I have seen too many reports of people hitting startup timeout errors
and thinking Codex is broken. Hopefully this will help people
self-serve. We may also want to consider raising the timeout to ~15s.
2. Make it more clear what PAT is (personal access token) in the GitHub
error

<img width="2378" height="674" alt="CleanShot 2025-10-23 at 22 05 06"
src="https://github.com/user-attachments/assets/d148ce1d-ade3-4511-84a4-c164aefdb5c5"
/>
											
										
										
											2025-10-23 22:54:45 -07:00
 								    #[test]
 								    fn mcp_init_error_display_includes_startup_timeout_hint() {
 								        let server_name = "slow";
 								        let err = anyhow::anyhow!("request timed out");
 								        let display = mcp_init_error_display(server_name, None, &err);
 								        assert_eq!(
 								            "MCP client for `slow` timed out after 10 seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.slow]\nstartup_timeout_sec = XX",
 								            display
 								        );
 								    }
-												fix: prefer sending MCP structuredContent as the function call response, if available (#2594)

Prior to this change, when we got a `CallToolResult` from an MCP server,
we JSON-serialized its `content` field as the `content` to send back to
the model as part of the function call output that we send back to the
model. This meant that we were dropping the `structuredContent` on the
floor.

Though reading
https://modelcontextprotocol.io/specification/2025-06-18/schema#tool, it
appears that if `outputSchema` is specified, then `structuredContent`
should be set, which seems to be a "higher-fidelity" response to the
function call. This PR updates our handling of `CallToolResult` to
prefer using the JSON-serialization of `structuredContent`, if present,
using `content` as a fallback.

Also, it appears that the sense of `success` was inverted prior to this
PR!
											
										
										
											2025-08-22 14:10:18 -07:00
+								}