Add an experimental plan tool (#1726)

This adds a tool the model can call to update a plan. The tool doesn't actually _do_ anything but it gives clients a chance to read and render the structured plan. We will likely iterate on the prompt and tools exposed for planning over time.
2025-07-29 11:22:02 -07:00
parent f8fcaaaf6f
commit 8828f6f082
14 changed files with 184 additions and 10 deletions
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -30,6 +30,7 @@ use crate::util::backoff;
 pub(crate) async fn stream_chat_completions(
    prompt: &Prompt,
    model: &str,
    include_plan_tool: bool,
    client: &reqwest::Client,
    provider: &ModelProviderInfo,
 ) -> Result<ResponseStream> {
@@ -105,7 +106,7 @@ pub(crate) async fn stream_chat_completions(
        }
    }
-    let tools_json = create_tools_json_for_chat_completions_api(prompt, model)?;
+    let tools_json = create_tools_json_for_chat_completions_api(prompt, model, include_plan_tool)?;
    let payload = json!({
        "model": model,
        "messages": messages,
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -77,6 +77,7 @@ impl ModelClient {
                let response_stream = stream_chat_completions(
                    prompt,
                    &self.config.model,
                    self.config.include_plan_tool,
                    &self.client,
                    &self.provider,
                )
@@ -115,7 +116,11 @@ impl ModelClient {
        }
        let full_instructions = prompt.get_full_instructions(&self.config.model);
-        let tools_json = create_tools_json_for_responses_api(prompt, &self.config.model)?;
+        let tools_json = create_tools_json_for_responses_api(
            prompt,
            &self.config.model,
            self.config.include_plan_tool,
        )?;
        let reasoning = create_reasoning_param_for_request(&self.config, self.effort, self.summary);
        // Request encrypted COT if we are not storing responses,
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -55,6 +55,7 @@ use crate::models::ReasoningItemReasoningSummary;
 use crate::models::ResponseInputItem;
 use crate::models::ResponseItem;
 use crate::models::ShellToolCallParams;
 use crate::plan_tool::handle_update_plan;
 use crate::project_doc::get_user_instructions;
 use crate::protocol::AgentMessageDeltaEvent;
 use crate::protocol::AgentMessageEvent;
@@ -1336,6 +1337,7 @@ async fn handle_function_call(
            };
            handle_container_exec_with_params(params, sess, sub_id, call_id).await
        }
        "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
        _ => {
            match sess.mcp_connection_manager.parse_tool_name(&name) {
                Some((server, tool_name)) => {
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -143,6 +143,9 @@ pub struct Config {
    /// Experimental rollout resume path (absolute path to .jsonl; undocumented).
    pub experimental_resume: Option<PathBuf>,
    /// Include an experimental plan tool that the model can use to update its current plan and status of each step.
    pub include_plan_tool: bool,
 }
 impl Config {
@@ -366,6 +369,7 @@ pub struct ConfigOverrides {
    pub config_profile: Option<String>,
    pub codex_linux_sandbox_exe: Option<PathBuf>,
    pub base_instructions: Option<String>,
    pub include_plan_tool: Option<bool>,
 }
 impl Config {
@@ -388,6 +392,7 @@ impl Config {
            config_profile: config_profile_key,
            codex_linux_sandbox_exe,
            base_instructions,
            include_plan_tool,
        } = overrides;
        let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
@@ -521,8 +526,8 @@ impl Config {
                .chatgpt_base_url
                .or(cfg.chatgpt_base_url)
                .unwrap_or("https://chatgpt.com/backend-api/".to_string()),
            experimental_resume,
            include_plan_tool: include_plan_tool.unwrap_or(false),
        };
        Ok(config)
    }
@@ -829,7 +834,7 @@ disable_response_storage = true
    ///
    /// 1. custom command-line argument, e.g. `--model o3`
    /// 2. as part of a profile, where the `--profile` is specified via a CLI
-    ///    (or in the config file itelf)
+    ///    (or in the config file itself)
    /// 3. as an entry in `config.toml`, e.g. `model = "o3"`
    /// 4. the default value for a required field defined in code, e.g.,
    ///    `crate::flags::OPENAI_DEFAULT_MODEL`
@@ -879,6 +884,7 @@ disable_response_storage = true
                chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
                experimental_resume: None,
                base_instructions: None,
                include_plan_tool: false,
            },
            o3_profile_config
        );
@@ -927,6 +933,7 @@ disable_response_storage = true
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
            experimental_resume: None,
            base_instructions: None,
            include_plan_tool: false,
        };
        assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
@@ -990,6 +997,7 @@ disable_response_storage = true
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
            experimental_resume: None,
            base_instructions: None,
            include_plan_tool: false,
        };
        assert_eq!(expected_zdr_profile_config, zdr_profile_config);
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -34,6 +34,7 @@ mod models;
 pub mod openai_api_key;
 mod openai_model_info;
 mod openai_tools;
 pub mod plan_tool;
 mod project_doc;
 pub mod protocol;
 mod rollout;
--- a/codex-rs/core/src/openai_tools.rs
+++ b/codex-rs/core/src/openai_tools.rs
@@ -4,13 +4,14 @@ use std::collections::BTreeMap;
 use std::sync::LazyLock;
 use crate::client_common::Prompt;
 use crate::plan_tool::PLAN_TOOL;
 #[derive(Debug, Clone, Serialize)]
 pub(crate) struct ResponsesApiTool {
-    name: &'static str,
+    pub(crate) name: &'static str,
-    description: &'static str,
+    pub(crate) description: &'static str,
-    strict: bool,
+    pub(crate) strict: bool,
-    parameters: JsonSchema,
+    pub(crate) parameters: JsonSchema,
 }
 /// When serialized as JSON, this produces a valid "Tool" in the OpenAI
@@ -74,6 +75,7 @@ static DEFAULT_CODEX_MODEL_TOOLS: LazyLock<Vec<OpenAiTool>> =
 pub(crate) fn create_tools_json_for_responses_api(
    prompt: &Prompt,
    model: &str,
    include_plan_tool: bool,
 ) -> crate::error::Result<Vec<serde_json::Value>> {
    // Assemble tool list: built-in tools + any extra tools from the prompt.
    let default_tools = if model.starts_with("codex") {
@@ -93,6 +95,10 @@ pub(crate) fn create_tools_json_for_responses_api(
            .map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
    );
    if include_plan_tool {
        tools_json.push(serde_json::to_value(PLAN_TOOL.clone())?);
    }
    Ok(tools_json)
 }
@@ -102,10 +108,12 @@ pub(crate) fn create_tools_json_for_responses_api(
 pub(crate) fn create_tools_json_for_chat_completions_api(
    prompt: &Prompt,
    model: &str,
    include_plan_tool: bool,
 ) -> crate::error::Result<Vec<serde_json::Value>> {
    // We start with the JSON for the Responses API and than rewrite it to match
    // the chat completions tool call format.
-    let responses_api_tools_json = create_tools_json_for_responses_api(prompt, model)?;
+    let responses_api_tools_json =
        create_tools_json_for_responses_api(prompt, model, include_plan_tool)?;
    let tools_json = responses_api_tools_json
        .into_iter()
        .filter_map(|mut tool| {
--- a/codex-rs/core/src/plan_tool.rs
+++ b/codex-rs/core/src/plan_tool.rs
@@ -0,0 +1,126 @@
 use std::collections::BTreeMap;
 use std::sync::LazyLock;
 use serde::Deserialize;
 use serde::Serialize;
 use crate::codex::Session;
 use crate::models::FunctionCallOutputPayload;
 use crate::models::ResponseInputItem;
 use crate::openai_tools::JsonSchema;
 use crate::openai_tools::OpenAiTool;
 use crate::openai_tools::ResponsesApiTool;
 use crate::protocol::Event;
 use crate::protocol::EventMsg;
 // Types for the TODO tool arguments matching codex-vscode/todo-mcp/src/main.rs
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
 pub enum StepStatus {
    Pending,
    InProgress,
    Completed,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct PlanItemArg {
    pub step: String,
    pub status: StepStatus,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct UpdatePlanArgs {
    #[serde(default)]
    pub explanation: Option<String>,
    pub plan: Vec<PlanItemArg>,
 }
 pub(crate) static PLAN_TOOL: LazyLock<OpenAiTool> = LazyLock::new(|| {
    let mut plan_item_props = BTreeMap::new();
    plan_item_props.insert("step".to_string(), JsonSchema::String);
    plan_item_props.insert("status".to_string(), JsonSchema::String);
    let plan_items_schema = JsonSchema::Array {
        items: Box::new(JsonSchema::Object {
            properties: plan_item_props,
            required: &["step", "status"],
            additional_properties: false,
        }),
    };
    let mut properties = BTreeMap::new();
    properties.insert("explanation".to_string(), JsonSchema::String);
    properties.insert("plan".to_string(), plan_items_schema);
    OpenAiTool::Function(ResponsesApiTool {
        name: "update_plan",
        description: r#"Use the update_plan tool to keep the user updated on the current plan for the task.
 After understanding the user's task, call the update_plan tool with an initial plan. An example of a plan:
 1. Explore the codebase to find relevant files (status: in_progress)
 2. Implement the feature in the XYZ component (status: pending)
 3. Commit changes and make a pull request (status: pending)
 Each step should be a short, 1-sentence description.
 Until all the steps are finished, there should always be exactly one in_progress step in the plan.
 Call the update_plan tool whenever you finish a step, marking the completed step as `completed` and marking the next step as `in_progress`.
 Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step.
 Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
 When all steps are completed, call update_plan one last time with all steps marked as `completed`."#,
        strict: false,
        parameters: JsonSchema::Object {
            properties,
            required: &["plan"],
            additional_properties: false,
        },
    })
 });
 /// This function doesn't do anything useful. However, it gives the model a structured way to record its plan that clients can read and render.
 /// So it's the _inputs_ to this function that are useful to clients, not the outputs and neither are actually useful for the model other
 /// than forcing it to come up and document a plan (TBD how that affects performance).
 pub(crate) async fn handle_update_plan(
    session: &Session,
    arguments: String,
    sub_id: String,
    call_id: String,
 ) -> ResponseInputItem {
    match parse_update_plan_arguments(arguments, &call_id) {
        Ok(args) => {
            let output = ResponseInputItem::FunctionCallOutput {
                call_id,
                output: FunctionCallOutputPayload {
                    content: "Plan updated".to_string(),
                    success: Some(true),
                },
            };
            session
                .send_event(Event {
                    id: sub_id.to_string(),
                    msg: EventMsg::PlanUpdate(args),
                })
                .await;
            output
        }
        Err(output) => *output,
    }
 }
 fn parse_update_plan_arguments(
    arguments: String,
    call_id: &str,
 ) -> Result<UpdatePlanArgs, Box<ResponseInputItem>> {
    match serde_json::from_str::<UpdatePlanArgs>(&arguments) {
        Ok(args) => Ok(args),
        Err(e) => {
            let output = ResponseInputItem::FunctionCallOutput {
                call_id: call_id.to_string(),
                output: FunctionCallOutputPayload {
                    content: format!("failed to parse function arguments: {e}"),
                    success: None,
                },
            };
            Err(Box::new(output))
        }
    }
 }
--- a/codex-rs/core/src/protocol.rs
+++ b/codex-rs/core/src/protocol.rs
@@ -19,6 +19,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
 use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use crate::message_history::HistoryEntry;
 use crate::model_provider_info::ModelProviderInfo;
 use crate::plan_tool::UpdatePlanArgs;
 /// Submission Queue Entry - requests from user
 #[derive(Debug, Clone, Deserialize, Serialize)]
@@ -335,6 +336,8 @@ pub enum EventMsg {
    /// Response to GetHistoryEntryRequest.
    GetHistoryEntryResponse(GetHistoryEntryResponseEvent),
    PlanUpdate(UpdatePlanArgs),
    /// Notification that the agent is shutting down.
    ShutdownComplete,
 }
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -1,5 +1,6 @@
 use codex_common::elapsed::format_elapsed;
 use codex_core::config::Config;
 use codex_core::plan_tool::UpdatePlanArgs;
 use codex_core::protocol::AgentMessageDeltaEvent;
 use codex_core::protocol::AgentMessageEvent;
 use codex_core::protocol::AgentReasoningDeltaEvent;
@@ -513,6 +514,11 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                ts_println!(self, "model: {}", model);
                println!();
            }
            EventMsg::PlanUpdate(plan_update_event) => {
                let UpdatePlanArgs { explanation, plan } = plan_update_event;
                ts_println!(self, "explanation: {explanation:?}");
                ts_println!(self, "plan: {plan:?}");
            }
            EventMsg::GetHistoryEntryResponse(_) => {
                // Currently ignored in exec output.
            }
--- a/codex-rs/exec/src/lib.rs
+++ b/codex-rs/exec/src/lib.rs
@@ -126,6 +126,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
        model_provider: None,
        codex_linux_sandbox_exe,
        base_instructions: None,
        include_plan_tool: None,
    };
    // Parse `-c` overrides.
    let cli_kv_overrides = match config_overrides.parse_overrides() {
--- a/codex-rs/mcp-server/src/codex_tool_config.rs
+++ b/codex-rs/mcp-server/src/codex_tool_config.rs
@@ -50,6 +50,10 @@ pub struct CodexToolCallParam {
    /// The set of instructions to use instead of the default ones.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub base_instructions: Option<String>,
    /// Whether to include the plan tool in the conversation.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub include_plan_tool: Option<bool>,
 }
 /// Custom enum mirroring [`AskForApproval`], but has an extra dependency on
@@ -140,9 +144,10 @@ impl CodexToolCallParam {
            sandbox,
            config: cli_overrides,
            base_instructions,
            include_plan_tool,
        } = self;
-        // Build the `ConfigOverrides` recognised by codex-core.
+        // Build the `ConfigOverrides` recognized by codex-core.
        let overrides = codex_core::config::ConfigOverrides {
            model,
            config_profile: profile,
@@ -152,6 +157,7 @@ impl CodexToolCallParam {
            model_provider: None,
            codex_linux_sandbox_exe,
            base_instructions,
            include_plan_tool,
        };
        let cli_overrides = cli_overrides
@@ -262,6 +268,10 @@ mod tests {
                "description": "Working directory for the session. If relative, it is resolved against the server process's current working directory.",
                "type": "string"
              },
              "include-plan-tool": {
                "description": "Whether to include the plan tool in the conversation.",
                "type": "boolean"
              },
              "model": {
                "description": "Optional override for the model name (e.g. \"o3\", \"o4-mini\").",
                "type": "string"
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -263,6 +263,7 @@ async fn run_codex_tool_session_inner(
                    | EventMsg::PatchApplyBegin(_)
                    | EventMsg::PatchApplyEnd(_)
                    | EventMsg::GetHistoryEntryResponse(_)
                    | EventMsg::PlanUpdate(_)
                    | EventMsg::ShutdownComplete => {
                        // For now, we do not do anything extra for these
                        // events. Note that
--- a/codex-rs/mcp-server/tests/interrupt.rs
+++ b/codex-rs/mcp-server/tests/interrupt.rs
@@ -81,6 +81,7 @@ async fn shell_command_interruption() -> anyhow::Result<()> {
            sandbox: None,
            config: None,
            base_instructions: None,
            include_plan_tool: None,
        })
        .await?;
--- a/codex-rs/tui/src/lib.rs
+++ b/codex-rs/tui/src/lib.rs
@@ -79,6 +79,7 @@ pub async fn run_main(
            config_profile: cli.config_profile.clone(),
            codex_linux_sandbox_exe,
            base_instructions: None,
            include_plan_tool: None,
        };
        // Parse `-c` overrides from the CLI.
        let cli_kv_overrides = match cli.config_overrides.parse_overrides() {