From 8828f6f082c7e1f4144fa014f18d480851c7f34e Mon Sep 17 00:00:00 2001
From: Gabriel Peal <gpeal@users.noreply.github.com>
Date: Tue, 29 Jul 2025 11:22:02 -0700
Subject: [PATCH] Add an experimental plan tool (#1726)

This adds a tool the model can call to update a plan. The tool doesn't
actually _do_ anything but it gives clients a chance to read and render
the structured plan. We will likely iterate on the prompt and tools
exposed for planning over time.
---
 codex-rs/core/src/chat_completions.rs         |   3 +-
 codex-rs/core/src/client.rs                   |   7 +-
 codex-rs/core/src/codex.rs                    |   2 +
 codex-rs/core/src/config.rs                   |  12 +-
 codex-rs/core/src/lib.rs                      |   1 +
 codex-rs/core/src/openai_tools.rs             |  18 ++-
 codex-rs/core/src/plan_tool.rs                | 126 ++++++++++++++++++
 codex-rs/core/src/protocol.rs                 |   3 +
 .../src/event_processor_with_human_output.rs  |   6 +
 codex-rs/exec/src/lib.rs                      |   1 +
 codex-rs/mcp-server/src/codex_tool_config.rs  |  12 +-
 codex-rs/mcp-server/src/codex_tool_runner.rs  |   1 +
 codex-rs/mcp-server/tests/interrupt.rs        |   1 +
 codex-rs/tui/src/lib.rs                       |   1 +
 14 files changed, 184 insertions(+), 10 deletions(-)
 create mode 100644 codex-rs/core/src/plan_tool.rs
diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs
index 5adf3c4d..3042ec45 100644
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -30,6 +30,7 @@ use crate::util::backoff;
 pub(crate) async fn stream_chat_completions(
     prompt: &Prompt,
     model: &str,
+    include_plan_tool: bool,
     client: &reqwest::Client,
     provider: &ModelProviderInfo,
 ) -> Result<ResponseStream> {
@@ -105,7 +106,7 @@ pub(crate) async fn stream_chat_completions(
         }
     }
 
-    let tools_json = create_tools_json_for_chat_completions_api(prompt, model)?;
+    let tools_json = create_tools_json_for_chat_completions_api(prompt, model, include_plan_tool)?;
     let payload = json!({
         "model": model,
         "messages": messages,
diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs
index 1648da6d..aa31b67e 100644
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -77,6 +77,7 @@ impl ModelClient {
                 let response_stream = stream_chat_completions(
                     prompt,
                     &self.config.model,
+                    self.config.include_plan_tool,
                     &self.client,
                     &self.provider,
                 )
@@ -115,7 +116,11 @@ impl ModelClient {
         }
 
         let full_instructions = prompt.get_full_instructions(&self.config.model);
-        let tools_json = create_tools_json_for_responses_api(prompt, &self.config.model)?;
+        let tools_json = create_tools_json_for_responses_api(
+            prompt,
+            &self.config.model,
+            self.config.include_plan_tool,
+        )?;
         let reasoning = create_reasoning_param_for_request(&self.config, self.effort, self.summary);
 
         // Request encrypted COT if we are not storing responses,
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 3ab3e8d7..6efc878f 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -55,6 +55,7 @@ use crate::models::ReasoningItemReasoningSummary;
 use crate::models::ResponseInputItem;
 use crate::models::ResponseItem;
 use crate::models::ShellToolCallParams;
+use crate::plan_tool::handle_update_plan;
 use crate::project_doc::get_user_instructions;
 use crate::protocol::AgentMessageDeltaEvent;
 use crate::protocol::AgentMessageEvent;
@@ -1336,6 +1337,7 @@ async fn handle_function_call(
             };
             handle_container_exec_with_params(params, sess, sub_id, call_id).await
         }
+        "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
         _ => {
             match sess.mcp_connection_manager.parse_tool_name(&name) {
                 Some((server, tool_name)) => {
diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs
index 57027bd0..53ca8d5b 100644
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -143,6 +143,9 @@ pub struct Config {
 
     /// Experimental rollout resume path (absolute path to .jsonl; undocumented).
     pub experimental_resume: Option<PathBuf>,
+
+    /// Include an experimental plan tool that the model can use to update its current plan and status of each step.
+    pub include_plan_tool: bool,
 }
 
 impl Config {
@@ -366,6 +369,7 @@ pub struct ConfigOverrides {
     pub config_profile: Option<String>,
     pub codex_linux_sandbox_exe: Option<PathBuf>,
     pub base_instructions: Option<String>,
+    pub include_plan_tool: Option<bool>,
 }
 
 impl Config {
@@ -388,6 +392,7 @@ impl Config {
             config_profile: config_profile_key,
             codex_linux_sandbox_exe,
             base_instructions,
+            include_plan_tool,
         } = overrides;
 
         let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
@@ -521,8 +526,8 @@ impl Config {
                 .chatgpt_base_url
                 .or(cfg.chatgpt_base_url)
                 .unwrap_or("https://chatgpt.com/backend-api/".to_string()),
-
             experimental_resume,
+            include_plan_tool: include_plan_tool.unwrap_or(false),
         };
         Ok(config)
     }
@@ -829,7 +834,7 @@ disable_response_storage = true
     ///
     /// 1. custom command-line argument, e.g. `--model o3`
     /// 2. as part of a profile, where the `--profile` is specified via a CLI
-    ///    (or in the config file itelf)
+    ///    (or in the config file itself)
     /// 3. as an entry in `config.toml`, e.g. `model = "o3"`
     /// 4. the default value for a required field defined in code, e.g.,
     ///    `crate::flags::OPENAI_DEFAULT_MODEL`
@@ -879,6 +884,7 @@ disable_response_storage = true
                 chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
                 experimental_resume: None,
                 base_instructions: None,
+                include_plan_tool: false,
             },
             o3_profile_config
         );
@@ -927,6 +933,7 @@ disable_response_storage = true
             chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
             experimental_resume: None,
             base_instructions: None,
+            include_plan_tool: false,
         };
 
         assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
@@ -990,6 +997,7 @@ disable_response_storage = true
             chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
             experimental_resume: None,
             base_instructions: None,
+            include_plan_tool: false,
         };
 
         assert_eq!(expected_zdr_profile_config, zdr_profile_config);
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
index 6cb6aaa6..b2dbded5 100644
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -34,6 +34,7 @@ mod models;
 pub mod openai_api_key;
 mod openai_model_info;
 mod openai_tools;
+pub mod plan_tool;
 mod project_doc;
 pub mod protocol;
 mod rollout;
diff --git a/codex-rs/core/src/openai_tools.rs b/codex-rs/core/src/openai_tools.rs
index ef12a629..0f1e7d9c 100644
--- a/codex-rs/core/src/openai_tools.rs
+++ b/codex-rs/core/src/openai_tools.rs
@@ -4,13 +4,14 @@ use std::collections::BTreeMap;
 use std::sync::LazyLock;
 
 use crate::client_common::Prompt;
+use crate::plan_tool::PLAN_TOOL;
 
 #[derive(Debug, Clone, Serialize)]
 pub(crate) struct ResponsesApiTool {
-    name: &'static str,
-    description: &'static str,
-    strict: bool,
-    parameters: JsonSchema,
+    pub(crate) name: &'static str,
+    pub(crate) description: &'static str,
+    pub(crate) strict: bool,
+    pub(crate) parameters: JsonSchema,
 }
 
 /// When serialized as JSON, this produces a valid "Tool" in the OpenAI
@@ -74,6 +75,7 @@ static DEFAULT_CODEX_MODEL_TOOLS: LazyLock<Vec<OpenAiTool>> =
 pub(crate) fn create_tools_json_for_responses_api(
     prompt: &Prompt,
     model: &str,
+    include_plan_tool: bool,
 ) -> crate::error::Result<Vec<serde_json::Value>> {
     // Assemble tool list: built-in tools + any extra tools from the prompt.
     let default_tools = if model.starts_with("codex") {
@@ -93,6 +95,10 @@ pub(crate) fn create_tools_json_for_responses_api(
             .map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
     );
 
+    if include_plan_tool {
+        tools_json.push(serde_json::to_value(PLAN_TOOL.clone())?);
+    }
+
     Ok(tools_json)
 }
 
@@ -102,10 +108,12 @@ pub(crate) fn create_tools_json_for_responses_api(
 pub(crate) fn create_tools_json_for_chat_completions_api(
     prompt: &Prompt,
     model: &str,
+    include_plan_tool: bool,
 ) -> crate::error::Result<Vec<serde_json::Value>> {
     // We start with the JSON for the Responses API and than rewrite it to match
     // the chat completions tool call format.
-    let responses_api_tools_json = create_tools_json_for_responses_api(prompt, model)?;
+    let responses_api_tools_json =
+        create_tools_json_for_responses_api(prompt, model, include_plan_tool)?;
     let tools_json = responses_api_tools_json
         .into_iter()
         .filter_map(|mut tool| {
diff --git a/codex-rs/core/src/plan_tool.rs b/codex-rs/core/src/plan_tool.rs
new file mode 100644
index 00000000..dbddb8b5
--- /dev/null
+++ b/codex-rs/core/src/plan_tool.rs
@@ -0,0 +1,126 @@
+use std::collections::BTreeMap;
+use std::sync::LazyLock;
+
+use serde::Deserialize;
+use serde::Serialize;
+
+use crate::codex::Session;
+use crate::models::FunctionCallOutputPayload;
+use crate::models::ResponseInputItem;
+use crate::openai_tools::JsonSchema;
+use crate::openai_tools::OpenAiTool;
+use crate::openai_tools::ResponsesApiTool;
+use crate::protocol::Event;
+use crate::protocol::EventMsg;
+
+// Types for the TODO tool arguments matching codex-vscode/todo-mcp/src/main.rs
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum StepStatus {
+    Pending,
+    InProgress,
+    Completed,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct PlanItemArg {
+    pub step: String,
+    pub status: StepStatus,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct UpdatePlanArgs {
+    #[serde(default)]
+    pub explanation: Option<String>,
+    pub plan: Vec<PlanItemArg>,
+}
+
+pub(crate) static PLAN_TOOL: LazyLock<OpenAiTool> = LazyLock::new(|| {
+    let mut plan_item_props = BTreeMap::new();
+    plan_item_props.insert("step".to_string(), JsonSchema::String);
+    plan_item_props.insert("status".to_string(), JsonSchema::String);
+
+    let plan_items_schema = JsonSchema::Array {
+        items: Box::new(JsonSchema::Object {
+            properties: plan_item_props,
+            required: &["step", "status"],
+            additional_properties: false,
+        }),
+    };
+
+    let mut properties = BTreeMap::new();
+    properties.insert("explanation".to_string(), JsonSchema::String);
+    properties.insert("plan".to_string(), plan_items_schema);
+
+    OpenAiTool::Function(ResponsesApiTool {
+        name: "update_plan",
+        description: r#"Use the update_plan tool to keep the user updated on the current plan for the task.
+After understanding the user's task, call the update_plan tool with an initial plan. An example of a plan:
+1. Explore the codebase to find relevant files (status: in_progress)
+2. Implement the feature in the XYZ component (status: pending)
+3. Commit changes and make a pull request (status: pending)
+Each step should be a short, 1-sentence description.
+Until all the steps are finished, there should always be exactly one in_progress step in the plan.
+Call the update_plan tool whenever you finish a step, marking the completed step as `completed` and marking the next step as `in_progress`.
+Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step.
+Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
+When all steps are completed, call update_plan one last time with all steps marked as `completed`."#,
+        strict: false,
+        parameters: JsonSchema::Object {
+            properties,
+            required: &["plan"],
+            additional_properties: false,
+        },
+    })
+});
+
+/// This function doesn't do anything useful. However, it gives the model a structured way to record its plan that clients can read and render.
+/// So it's the _inputs_ to this function that are useful to clients, not the outputs and neither are actually useful for the model other
+/// than forcing it to come up and document a plan (TBD how that affects performance).
+pub(crate) async fn handle_update_plan(
+    session: &Session,
+    arguments: String,
+    sub_id: String,
+    call_id: String,
+) -> ResponseInputItem {
+    match parse_update_plan_arguments(arguments, &call_id) {
+        Ok(args) => {
+            let output = ResponseInputItem::FunctionCallOutput {
+                call_id,
+                output: FunctionCallOutputPayload {
+                    content: "Plan updated".to_string(),
+                    success: Some(true),
+                },
+            };
+            session
+                .send_event(Event {
+                    id: sub_id.to_string(),
+                    msg: EventMsg::PlanUpdate(args),
+                })
+                .await;
+            output
+        }
+        Err(output) => *output,
+    }
+}
+
+fn parse_update_plan_arguments(
+    arguments: String,
+    call_id: &str,
+) -> Result<UpdatePlanArgs, Box<ResponseInputItem>> {
+    match serde_json::from_str::<UpdatePlanArgs>(&arguments) {
+        Ok(args) => Ok(args),
+        Err(e) => {
+            let output = ResponseInputItem::FunctionCallOutput {
+                call_id: call_id.to_string(),
+                output: FunctionCallOutputPayload {
+                    content: format!("failed to parse function arguments: {e}"),
+                    success: None,
+                },
+            };
+            Err(Box::new(output))
+        }
+    }
+}
diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs
index 22bc1809..041a8c58 100644
--- a/codex-rs/core/src/protocol.rs
+++ b/codex-rs/core/src/protocol.rs
@@ -19,6 +19,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
 use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use crate::message_history::HistoryEntry;
 use crate::model_provider_info::ModelProviderInfo;
+use crate::plan_tool::UpdatePlanArgs;
 
 /// Submission Queue Entry - requests from user
 #[derive(Debug, Clone, Deserialize, Serialize)]
@@ -335,6 +336,8 @@ pub enum EventMsg {
     /// Response to GetHistoryEntryRequest.
     GetHistoryEntryResponse(GetHistoryEntryResponseEvent),
 
+    PlanUpdate(UpdatePlanArgs),
+
     /// Notification that the agent is shutting down.
     ShutdownComplete,
 }
diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs
index bc647c68..6c3f73f0 100644
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -1,5 +1,6 @@
 use codex_common::elapsed::format_elapsed;
 use codex_core::config::Config;
+use codex_core::plan_tool::UpdatePlanArgs;
 use codex_core::protocol::AgentMessageDeltaEvent;
 use codex_core::protocol::AgentMessageEvent;
 use codex_core::protocol::AgentReasoningDeltaEvent;
@@ -513,6 +514,11 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                 ts_println!(self, "model: {}", model);
                 println!();
             }
+            EventMsg::PlanUpdate(plan_update_event) => {
+                let UpdatePlanArgs { explanation, plan } = plan_update_event;
+                ts_println!(self, "explanation: {explanation:?}");
+                ts_println!(self, "plan: {plan:?}");
+            }
             EventMsg::GetHistoryEntryResponse(_) => {
                 // Currently ignored in exec output.
             }
diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs
index cf2f2bd6..ce4d7f65 100644
--- a/codex-rs/exec/src/lib.rs
+++ b/codex-rs/exec/src/lib.rs
@@ -126,6 +126,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
         model_provider: None,
         codex_linux_sandbox_exe,
         base_instructions: None,
+        include_plan_tool: None,
     };
     // Parse `-c` overrides.
     let cli_kv_overrides = match config_overrides.parse_overrides() {
diff --git a/codex-rs/mcp-server/src/codex_tool_config.rs b/codex-rs/mcp-server/src/codex_tool_config.rs
index 9f6f7a78..877d0e05 100644
--- a/codex-rs/mcp-server/src/codex_tool_config.rs
+++ b/codex-rs/mcp-server/src/codex_tool_config.rs
@@ -50,6 +50,10 @@ pub struct CodexToolCallParam {
     /// The set of instructions to use instead of the default ones.
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub base_instructions: Option<String>,
+
+    /// Whether to include the plan tool in the conversation.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub include_plan_tool: Option<bool>,
 }
 
 /// Custom enum mirroring [`AskForApproval`], but has an extra dependency on
@@ -140,9 +144,10 @@ impl CodexToolCallParam {
             sandbox,
             config: cli_overrides,
             base_instructions,
+            include_plan_tool,
         } = self;
 
-        // Build the `ConfigOverrides` recognised by codex-core.
+        // Build the `ConfigOverrides` recognized by codex-core.
         let overrides = codex_core::config::ConfigOverrides {
             model,
             config_profile: profile,
@@ -152,6 +157,7 @@ impl CodexToolCallParam {
             model_provider: None,
             codex_linux_sandbox_exe,
             base_instructions,
+            include_plan_tool,
         };
 
         let cli_overrides = cli_overrides
@@ -262,6 +268,10 @@ mod tests {
                 "description": "Working directory for the session. If relative, it is resolved against the server process's current working directory.",
                 "type": "string"
               },
+              "include-plan-tool": {
+                "description": "Whether to include the plan tool in the conversation.",
+                "type": "boolean"
+              },
               "model": {
                 "description": "Optional override for the model name (e.g. \"o3\", \"o4-mini\").",
                 "type": "string"
diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs
index c3cb39c4..f25659b2 100644
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -263,6 +263,7 @@ async fn run_codex_tool_session_inner(
                     | EventMsg::PatchApplyBegin(_)
                     | EventMsg::PatchApplyEnd(_)
                     | EventMsg::GetHistoryEntryResponse(_)
+                    | EventMsg::PlanUpdate(_)
                     | EventMsg::ShutdownComplete => {
                         // For now, we do not do anything extra for these
                         // events. Note that
diff --git a/codex-rs/mcp-server/tests/interrupt.rs b/codex-rs/mcp-server/tests/interrupt.rs
index cd163ea0..313bc7af 100644
--- a/codex-rs/mcp-server/tests/interrupt.rs
+++ b/codex-rs/mcp-server/tests/interrupt.rs
@@ -81,6 +81,7 @@ async fn shell_command_interruption() -> anyhow::Result<()> {
             sandbox: None,
             config: None,
             base_instructions: None,
+            include_plan_tool: None,
         })
         .await?;
 
diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs
index f93c0a2b..6c6c6621 100644
--- a/codex-rs/tui/src/lib.rs
+++ b/codex-rs/tui/src/lib.rs
@@ -79,6 +79,7 @@ pub async fn run_main(
             config_profile: cli.config_profile.clone(),
             codex_linux_sandbox_exe,
             base_instructions: None,
+            include_plan_tool: None,
         };
         // Parse `-c` overrides from the CLI.
         let cli_kv_overrides = match cli.config_overrides.parse_overrides() {