Add an experimental plan tool (#1726)

This adds a tool the model can call to update a plan. The tool doesn't actually _do_ anything but it gives clients a chance to read and render the structured plan. We will likely iterate on the prompt and tools exposed for planning over time.
2025-07-29 11:22:02 -07:00
parent f8fcaaaf6f
commit 8828f6f082
14 changed files with 184 additions and 10 deletions
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -30,6 +30,7 @@ use crate::util::backoff;
 pub(crate) async fn stream_chat_completions(
    prompt: &Prompt,
    model: &str,
+    include_plan_tool: bool,
    client: &reqwest::Client,
    provider: &ModelProviderInfo,
 ) -> Result<ResponseStream> {
@@ -105,7 +106,7 @@ pub(crate) async fn stream_chat_completions(
        }
    }

-    let tools_json = create_tools_json_for_chat_completions_api(prompt, model)?;
+    let tools_json = create_tools_json_for_chat_completions_api(prompt, model, include_plan_tool)?;
    let payload = json!({
        "model": model,
        "messages": messages,
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -77,6 +77,7 @@ impl ModelClient {
                let response_stream = stream_chat_completions(
                    prompt,
                    &self.config.model,
+                    self.config.include_plan_tool,
                    &self.client,
                    &self.provider,
                )
@@ -115,7 +116,11 @@ impl ModelClient {
        }

        let full_instructions = prompt.get_full_instructions(&self.config.model);
-        let tools_json = create_tools_json_for_responses_api(prompt, &self.config.model)?;
+        let tools_json = create_tools_json_for_responses_api(
+            prompt,
+            &self.config.model,
+            self.config.include_plan_tool,
+        )?;
        let reasoning = create_reasoning_param_for_request(&self.config, self.effort, self.summary);

        // Request encrypted COT if we are not storing responses,
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -55,6 +55,7 @@ use crate::models::ReasoningItemReasoningSummary;
 use crate::models::ResponseInputItem;
 use crate::models::ResponseItem;
 use crate::models::ShellToolCallParams;
+use crate::plan_tool::handle_update_plan;
 use crate::project_doc::get_user_instructions;
 use crate::protocol::AgentMessageDeltaEvent;
 use crate::protocol::AgentMessageEvent;
@@ -1336,6 +1337,7 @@ async fn handle_function_call(
            };
            handle_container_exec_with_params(params, sess, sub_id, call_id).await
        }
+        "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
        _ => {
            match sess.mcp_connection_manager.parse_tool_name(&name) {
                Some((server, tool_name)) => {
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -143,6 +143,9 @@ pub struct Config {

    /// Experimental rollout resume path (absolute path to .jsonl; undocumented).
    pub experimental_resume: Option<PathBuf>,
+
+    /// Include an experimental plan tool that the model can use to update its current plan and status of each step.
+    pub include_plan_tool: bool,
 }

 impl Config {
@@ -366,6 +369,7 @@ pub struct ConfigOverrides {
    pub config_profile: Option<String>,
    pub codex_linux_sandbox_exe: Option<PathBuf>,
    pub base_instructions: Option<String>,
+    pub include_plan_tool: Option<bool>,
 }

 impl Config {
@@ -388,6 +392,7 @@ impl Config {
            config_profile: config_profile_key,
            codex_linux_sandbox_exe,
            base_instructions,
+            include_plan_tool,
        } = overrides;

        let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
@@ -521,8 +526,8 @@ impl Config {
                .chatgpt_base_url
                .or(cfg.chatgpt_base_url)
                .unwrap_or("https://chatgpt.com/backend-api/".to_string()),
-
            experimental_resume,
+            include_plan_tool: include_plan_tool.unwrap_or(false),
        };
        Ok(config)
    }
@@ -829,7 +834,7 @@ disable_response_storage = true
    ///
    /// 1. custom command-line argument, e.g. `--model o3`
    /// 2. as part of a profile, where the `--profile` is specified via a CLI
-    ///    (or in the config file itelf)
+    ///    (or in the config file itself)
    /// 3. as an entry in `config.toml`, e.g. `model = "o3"`
    /// 4. the default value for a required field defined in code, e.g.,
    ///    `crate::flags::OPENAI_DEFAULT_MODEL`
@@ -879,6 +884,7 @@ disable_response_storage = true
                chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
                experimental_resume: None,
                base_instructions: None,
+                include_plan_tool: false,
            },
            o3_profile_config
        );
@@ -927,6 +933,7 @@ disable_response_storage = true
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
            experimental_resume: None,
            base_instructions: None,
+            include_plan_tool: false,
        };

        assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
@@ -990,6 +997,7 @@ disable_response_storage = true
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
            experimental_resume: None,
            base_instructions: None,
+            include_plan_tool: false,
        };

        assert_eq!(expected_zdr_profile_config, zdr_profile_config);
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -34,6 +34,7 @@ mod models;
 pub mod openai_api_key;
 mod openai_model_info;
 mod openai_tools;
+pub mod plan_tool;
 mod project_doc;
 pub mod protocol;
 mod rollout;
--- a/codex-rs/core/src/openai_tools.rs
+++ b/codex-rs/core/src/openai_tools.rs
@@ -4,13 +4,14 @@ use std::collections::BTreeMap;
 use std::sync::LazyLock;

 use crate::client_common::Prompt;
+use crate::plan_tool::PLAN_TOOL;

 #[derive(Debug, Clone, Serialize)]
 pub(crate) struct ResponsesApiTool {
-    name: &'static str,
-    description: &'static str,
-    strict: bool,
-    parameters: JsonSchema,
+    pub(crate) name: &'static str,
+    pub(crate) description: &'static str,
+    pub(crate) strict: bool,
+    pub(crate) parameters: JsonSchema,
 }

 /// When serialized as JSON, this produces a valid "Tool" in the OpenAI
@@ -74,6 +75,7 @@ static DEFAULT_CODEX_MODEL_TOOLS: LazyLock<Vec<OpenAiTool>> =
 pub(crate) fn create_tools_json_for_responses_api(
    prompt: &Prompt,
    model: &str,
+    include_plan_tool: bool,
 ) -> crate::error::Result<Vec<serde_json::Value>> {
    // Assemble tool list: built-in tools + any extra tools from the prompt.
    let default_tools = if model.starts_with("codex") {
@@ -93,6 +95,10 @@ pub(crate) fn create_tools_json_for_responses_api(
            .map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
    );

+    if include_plan_tool {
+        tools_json.push(serde_json::to_value(PLAN_TOOL.clone())?);
+    }
+
    Ok(tools_json)
 }

@@ -102,10 +108,12 @@ pub(crate) fn create_tools_json_for_responses_api(
 pub(crate) fn create_tools_json_for_chat_completions_api(
    prompt: &Prompt,
    model: &str,
+    include_plan_tool: bool,
 ) -> crate::error::Result<Vec<serde_json::Value>> {
    // We start with the JSON for the Responses API and than rewrite it to match
    // the chat completions tool call format.
-    let responses_api_tools_json = create_tools_json_for_responses_api(prompt, model)?;
+    let responses_api_tools_json =
+        create_tools_json_for_responses_api(prompt, model, include_plan_tool)?;
    let tools_json = responses_api_tools_json
        .into_iter()
        .filter_map(|mut tool| {
--- a/codex-rs/core/src/plan_tool.rs
+++ b/codex-rs/core/src/plan_tool.rs
@@ -0,0 +1,126 @@
+use std::collections::BTreeMap;
+use std::sync::LazyLock;
+
+use serde::Deserialize;
+use serde::Serialize;
+
+use crate::codex::Session;
+use crate::models::FunctionCallOutputPayload;
+use crate::models::ResponseInputItem;
+use crate::openai_tools::JsonSchema;
+use crate::openai_tools::OpenAiTool;
+use crate::openai_tools::ResponsesApiTool;
+use crate::protocol::Event;
+use crate::protocol::EventMsg;
+
+// Types for the TODO tool arguments matching codex-vscode/todo-mcp/src/main.rs
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum StepStatus {
+    Pending,
+    InProgress,
+    Completed,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct PlanItemArg {
+    pub step: String,
+    pub status: StepStatus,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct UpdatePlanArgs {
+    #[serde(default)]
+    pub explanation: Option<String>,
+    pub plan: Vec<PlanItemArg>,
+}
+
+pub(crate) static PLAN_TOOL: LazyLock<OpenAiTool> = LazyLock::new(|| {
+    let mut plan_item_props = BTreeMap::new();
+    plan_item_props.insert("step".to_string(), JsonSchema::String);
+    plan_item_props.insert("status".to_string(), JsonSchema::String);
+
+    let plan_items_schema = JsonSchema::Array {
+        items: Box::new(JsonSchema::Object {
+            properties: plan_item_props,
+            required: &["step", "status"],
+            additional_properties: false,
+        }),
+    };
+
+    let mut properties = BTreeMap::new();
+    properties.insert("explanation".to_string(), JsonSchema::String);
+    properties.insert("plan".to_string(), plan_items_schema);
+
+    OpenAiTool::Function(ResponsesApiTool {
+        name: "update_plan",
+        description: r#"Use the update_plan tool to keep the user updated on the current plan for the task.
+After understanding the user's task, call the update_plan tool with an initial plan. An example of a plan:
+1. Explore the codebase to find relevant files (status: in_progress)
+2. Implement the feature in the XYZ component (status: pending)
+3. Commit changes and make a pull request (status: pending)
+Each step should be a short, 1-sentence description.
+Until all the steps are finished, there should always be exactly one in_progress step in the plan.
+Call the update_plan tool whenever you finish a step, marking the completed step as `completed` and marking the next step as `in_progress`.
+Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step.
+Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
+When all steps are completed, call update_plan one last time with all steps marked as `completed`."#,
+        strict: false,
+        parameters: JsonSchema::Object {
+            properties,
+            required: &["plan"],
+            additional_properties: false,
+        },
+    })
+});
+
+/// This function doesn't do anything useful. However, it gives the model a structured way to record its plan that clients can read and render.
+/// So it's the _inputs_ to this function that are useful to clients, not the outputs and neither are actually useful for the model other
+/// than forcing it to come up and document a plan (TBD how that affects performance).
+pub(crate) async fn handle_update_plan(
+    session: &Session,
+    arguments: String,
+    sub_id: String,
+    call_id: String,
+) -> ResponseInputItem {
+    match parse_update_plan_arguments(arguments, &call_id) {
+        Ok(args) => {
+            let output = ResponseInputItem::FunctionCallOutput {
+                call_id,
+                output: FunctionCallOutputPayload {
+                    content: "Plan updated".to_string(),
+                    success: Some(true),
+                },
+            };
+            session
+                .send_event(Event {
+                    id: sub_id.to_string(),
+                    msg: EventMsg::PlanUpdate(args),
+                })
+                .await;
+            output
+        }
+        Err(output) => *output,
+    }
+}
+
+fn parse_update_plan_arguments(
+    arguments: String,
+    call_id: &str,
+) -> Result<UpdatePlanArgs, Box<ResponseInputItem>> {
+    match serde_json::from_str::<UpdatePlanArgs>(&arguments) {
+        Ok(args) => Ok(args),
+        Err(e) => {
+            let output = ResponseInputItem::FunctionCallOutput {
+                call_id: call_id.to_string(),
+                output: FunctionCallOutputPayload {
+                    content: format!("failed to parse function arguments: {e}"),
+                    success: None,
+                },
+            };
+            Err(Box::new(output))
+        }
+    }
+}
--- a/codex-rs/core/src/protocol.rs
+++ b/codex-rs/core/src/protocol.rs
@@ -19,6 +19,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
 use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use crate::message_history::HistoryEntry;
 use crate::model_provider_info::ModelProviderInfo;
+use crate::plan_tool::UpdatePlanArgs;

 /// Submission Queue Entry - requests from user
 #[derive(Debug, Clone, Deserialize, Serialize)]
@@ -335,6 +336,8 @@ pub enum EventMsg {
    /// Response to GetHistoryEntryRequest.
    GetHistoryEntryResponse(GetHistoryEntryResponseEvent),

+    PlanUpdate(UpdatePlanArgs),
+
    /// Notification that the agent is shutting down.
    ShutdownComplete,
 }
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -1,5 +1,6 @@
 use codex_common::elapsed::format_elapsed;
 use codex_core::config::Config;
+use codex_core::plan_tool::UpdatePlanArgs;
 use codex_core::protocol::AgentMessageDeltaEvent;
 use codex_core::protocol::AgentMessageEvent;
 use codex_core::protocol::AgentReasoningDeltaEvent;
@@ -513,6 +514,11 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                ts_println!(self, "model: {}", model);
                println!();
            }
+            EventMsg::PlanUpdate(plan_update_event) => {
+                let UpdatePlanArgs { explanation, plan } = plan_update_event;
+                ts_println!(self, "explanation: {explanation:?}");
+                ts_println!(self, "plan: {plan:?}");
+            }
            EventMsg::GetHistoryEntryResponse(_) => {
                // Currently ignored in exec output.
            }
--- a/codex-rs/exec/src/lib.rs
+++ b/codex-rs/exec/src/lib.rs
@@ -126,6 +126,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
        model_provider: None,
        codex_linux_sandbox_exe,
        base_instructions: None,
+        include_plan_tool: None,
    };
    // Parse `-c` overrides.
    let cli_kv_overrides = match config_overrides.parse_overrides() {
--- a/codex-rs/mcp-server/src/codex_tool_config.rs
+++ b/codex-rs/mcp-server/src/codex_tool_config.rs
@@ -50,6 +50,10 @@ pub struct CodexToolCallParam {
    /// The set of instructions to use instead of the default ones.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub base_instructions: Option<String>,
+
+    /// Whether to include the plan tool in the conversation.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub include_plan_tool: Option<bool>,
 }

 /// Custom enum mirroring [`AskForApproval`], but has an extra dependency on
@@ -140,9 +144,10 @@ impl CodexToolCallParam {
            sandbox,
            config: cli_overrides,
            base_instructions,
+            include_plan_tool,
        } = self;

-        // Build the `ConfigOverrides` recognised by codex-core.
+        // Build the `ConfigOverrides` recognized by codex-core.
        let overrides = codex_core::config::ConfigOverrides {
            model,
            config_profile: profile,
@@ -152,6 +157,7 @@ impl CodexToolCallParam {
            model_provider: None,
            codex_linux_sandbox_exe,
            base_instructions,
+            include_plan_tool,
        };

        let cli_overrides = cli_overrides
@@ -262,6 +268,10 @@ mod tests {
                "description": "Working directory for the session. If relative, it is resolved against the server process's current working directory.",
                "type": "string"
              },
+              "include-plan-tool": {
+                "description": "Whether to include the plan tool in the conversation.",
+                "type": "boolean"
+              },
              "model": {
                "description": "Optional override for the model name (e.g. \"o3\", \"o4-mini\").",
                "type": "string"
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -263,6 +263,7 @@ async fn run_codex_tool_session_inner(
                    | EventMsg::PatchApplyBegin(_)
                    | EventMsg::PatchApplyEnd(_)
                    | EventMsg::GetHistoryEntryResponse(_)
+                    | EventMsg::PlanUpdate(_)
                    | EventMsg::ShutdownComplete => {
                        // For now, we do not do anything extra for these
                        // events. Note that
--- a/codex-rs/mcp-server/tests/interrupt.rs
+++ b/codex-rs/mcp-server/tests/interrupt.rs
@@ -81,6 +81,7 @@ async fn shell_command_interruption() -> anyhow::Result<()> {
            sandbox: None,
            config: None,
            base_instructions: None,
+            include_plan_tool: None,
        })
        .await?;

--- a/codex-rs/tui/src/lib.rs
+++ b/codex-rs/tui/src/lib.rs
@@ -79,6 +79,7 @@ pub async fn run_main(
            config_profile: cli.config_profile.clone(),
            codex_linux_sandbox_exe,
            base_instructions: None,
+            include_plan_tool: None,
        };
        // Parse `-c` overrides from the CLI.
        let cli_kv_overrides = match cli.config_overrides.parse_overrides() {