From 8828f6f082c7e1f4144fa014f18d480851c7f34e Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Tue, 29 Jul 2025 11:22:02 -0700 Subject: [PATCH] Add an experimental plan tool (#1726) This adds a tool the model can call to update a plan. The tool doesn't actually _do_ anything but it gives clients a chance to read and render the structured plan. We will likely iterate on the prompt and tools exposed for planning over time. --- codex-rs/core/src/chat_completions.rs | 3 +- codex-rs/core/src/client.rs | 7 +- codex-rs/core/src/codex.rs | 2 + codex-rs/core/src/config.rs | 12 +- codex-rs/core/src/lib.rs | 1 + codex-rs/core/src/openai_tools.rs | 18 ++- codex-rs/core/src/plan_tool.rs | 126 ++++++++++++++++++ codex-rs/core/src/protocol.rs | 3 + .../src/event_processor_with_human_output.rs | 6 + codex-rs/exec/src/lib.rs | 1 + codex-rs/mcp-server/src/codex_tool_config.rs | 12 +- codex-rs/mcp-server/src/codex_tool_runner.rs | 1 + codex-rs/mcp-server/tests/interrupt.rs | 1 + codex-rs/tui/src/lib.rs | 1 + 14 files changed, 184 insertions(+), 10 deletions(-) create mode 100644 codex-rs/core/src/plan_tool.rs diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs index 5adf3c4d..3042ec45 100644 --- a/codex-rs/core/src/chat_completions.rs +++ b/codex-rs/core/src/chat_completions.rs @@ -30,6 +30,7 @@ use crate::util::backoff; pub(crate) async fn stream_chat_completions( prompt: &Prompt, model: &str, + include_plan_tool: bool, client: &reqwest::Client, provider: &ModelProviderInfo, ) -> Result { @@ -105,7 +106,7 @@ pub(crate) async fn stream_chat_completions( } } - let tools_json = create_tools_json_for_chat_completions_api(prompt, model)?; + let tools_json = create_tools_json_for_chat_completions_api(prompt, model, include_plan_tool)?; let payload = json!({ "model": model, "messages": messages, diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index 1648da6d..aa31b67e 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -77,6 +77,7 @@ impl ModelClient { let response_stream = stream_chat_completions( prompt, &self.config.model, + self.config.include_plan_tool, &self.client, &self.provider, ) @@ -115,7 +116,11 @@ impl ModelClient { } let full_instructions = prompt.get_full_instructions(&self.config.model); - let tools_json = create_tools_json_for_responses_api(prompt, &self.config.model)?; + let tools_json = create_tools_json_for_responses_api( + prompt, + &self.config.model, + self.config.include_plan_tool, + )?; let reasoning = create_reasoning_param_for_request(&self.config, self.effort, self.summary); // Request encrypted COT if we are not storing responses, diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 3ab3e8d7..6efc878f 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -55,6 +55,7 @@ use crate::models::ReasoningItemReasoningSummary; use crate::models::ResponseInputItem; use crate::models::ResponseItem; use crate::models::ShellToolCallParams; +use crate::plan_tool::handle_update_plan; use crate::project_doc::get_user_instructions; use crate::protocol::AgentMessageDeltaEvent; use crate::protocol::AgentMessageEvent; @@ -1336,6 +1337,7 @@ async fn handle_function_call( }; handle_container_exec_with_params(params, sess, sub_id, call_id).await } + "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await, _ => { match sess.mcp_connection_manager.parse_tool_name(&name) { Some((server, tool_name)) => { diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index 57027bd0..53ca8d5b 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -143,6 +143,9 @@ pub struct Config { /// Experimental rollout resume path (absolute path to .jsonl; undocumented). pub experimental_resume: Option, + + /// Include an experimental plan tool that the model can use to update its current plan and status of each step. + pub include_plan_tool: bool, } impl Config { @@ -366,6 +369,7 @@ pub struct ConfigOverrides { pub config_profile: Option, pub codex_linux_sandbox_exe: Option, pub base_instructions: Option, + pub include_plan_tool: Option, } impl Config { @@ -388,6 +392,7 @@ impl Config { config_profile: config_profile_key, codex_linux_sandbox_exe, base_instructions, + include_plan_tool, } = overrides; let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) { @@ -521,8 +526,8 @@ impl Config { .chatgpt_base_url .or(cfg.chatgpt_base_url) .unwrap_or("https://chatgpt.com/backend-api/".to_string()), - experimental_resume, + include_plan_tool: include_plan_tool.unwrap_or(false), }; Ok(config) } @@ -829,7 +834,7 @@ disable_response_storage = true /// /// 1. custom command-line argument, e.g. `--model o3` /// 2. as part of a profile, where the `--profile` is specified via a CLI - /// (or in the config file itelf) + /// (or in the config file itself) /// 3. as an entry in `config.toml`, e.g. `model = "o3"` /// 4. the default value for a required field defined in code, e.g., /// `crate::flags::OPENAI_DEFAULT_MODEL` @@ -879,6 +884,7 @@ disable_response_storage = true chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), experimental_resume: None, base_instructions: None, + include_plan_tool: false, }, o3_profile_config ); @@ -927,6 +933,7 @@ disable_response_storage = true chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), experimental_resume: None, base_instructions: None, + include_plan_tool: false, }; assert_eq!(expected_gpt3_profile_config, gpt3_profile_config); @@ -990,6 +997,7 @@ disable_response_storage = true chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), experimental_resume: None, base_instructions: None, + include_plan_tool: false, }; assert_eq!(expected_zdr_profile_config, zdr_profile_config); diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 6cb6aaa6..b2dbded5 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -34,6 +34,7 @@ mod models; pub mod openai_api_key; mod openai_model_info; mod openai_tools; +pub mod plan_tool; mod project_doc; pub mod protocol; mod rollout; diff --git a/codex-rs/core/src/openai_tools.rs b/codex-rs/core/src/openai_tools.rs index ef12a629..0f1e7d9c 100644 --- a/codex-rs/core/src/openai_tools.rs +++ b/codex-rs/core/src/openai_tools.rs @@ -4,13 +4,14 @@ use std::collections::BTreeMap; use std::sync::LazyLock; use crate::client_common::Prompt; +use crate::plan_tool::PLAN_TOOL; #[derive(Debug, Clone, Serialize)] pub(crate) struct ResponsesApiTool { - name: &'static str, - description: &'static str, - strict: bool, - parameters: JsonSchema, + pub(crate) name: &'static str, + pub(crate) description: &'static str, + pub(crate) strict: bool, + pub(crate) parameters: JsonSchema, } /// When serialized as JSON, this produces a valid "Tool" in the OpenAI @@ -74,6 +75,7 @@ static DEFAULT_CODEX_MODEL_TOOLS: LazyLock> = pub(crate) fn create_tools_json_for_responses_api( prompt: &Prompt, model: &str, + include_plan_tool: bool, ) -> crate::error::Result> { // Assemble tool list: built-in tools + any extra tools from the prompt. let default_tools = if model.starts_with("codex") { @@ -93,6 +95,10 @@ pub(crate) fn create_tools_json_for_responses_api( .map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)), ); + if include_plan_tool { + tools_json.push(serde_json::to_value(PLAN_TOOL.clone())?); + } + Ok(tools_json) } @@ -102,10 +108,12 @@ pub(crate) fn create_tools_json_for_responses_api( pub(crate) fn create_tools_json_for_chat_completions_api( prompt: &Prompt, model: &str, + include_plan_tool: bool, ) -> crate::error::Result> { // We start with the JSON for the Responses API and than rewrite it to match // the chat completions tool call format. - let responses_api_tools_json = create_tools_json_for_responses_api(prompt, model)?; + let responses_api_tools_json = + create_tools_json_for_responses_api(prompt, model, include_plan_tool)?; let tools_json = responses_api_tools_json .into_iter() .filter_map(|mut tool| { diff --git a/codex-rs/core/src/plan_tool.rs b/codex-rs/core/src/plan_tool.rs new file mode 100644 index 00000000..dbddb8b5 --- /dev/null +++ b/codex-rs/core/src/plan_tool.rs @@ -0,0 +1,126 @@ +use std::collections::BTreeMap; +use std::sync::LazyLock; + +use serde::Deserialize; +use serde::Serialize; + +use crate::codex::Session; +use crate::models::FunctionCallOutputPayload; +use crate::models::ResponseInputItem; +use crate::openai_tools::JsonSchema; +use crate::openai_tools::OpenAiTool; +use crate::openai_tools::ResponsesApiTool; +use crate::protocol::Event; +use crate::protocol::EventMsg; + +// Types for the TODO tool arguments matching codex-vscode/todo-mcp/src/main.rs +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum StepStatus { + Pending, + InProgress, + Completed, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct PlanItemArg { + pub step: String, + pub status: StepStatus, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct UpdatePlanArgs { + #[serde(default)] + pub explanation: Option, + pub plan: Vec, +} + +pub(crate) static PLAN_TOOL: LazyLock = LazyLock::new(|| { + let mut plan_item_props = BTreeMap::new(); + plan_item_props.insert("step".to_string(), JsonSchema::String); + plan_item_props.insert("status".to_string(), JsonSchema::String); + + let plan_items_schema = JsonSchema::Array { + items: Box::new(JsonSchema::Object { + properties: plan_item_props, + required: &["step", "status"], + additional_properties: false, + }), + }; + + let mut properties = BTreeMap::new(); + properties.insert("explanation".to_string(), JsonSchema::String); + properties.insert("plan".to_string(), plan_items_schema); + + OpenAiTool::Function(ResponsesApiTool { + name: "update_plan", + description: r#"Use the update_plan tool to keep the user updated on the current plan for the task. +After understanding the user's task, call the update_plan tool with an initial plan. An example of a plan: +1. Explore the codebase to find relevant files (status: in_progress) +2. Implement the feature in the XYZ component (status: pending) +3. Commit changes and make a pull request (status: pending) +Each step should be a short, 1-sentence description. +Until all the steps are finished, there should always be exactly one in_progress step in the plan. +Call the update_plan tool whenever you finish a step, marking the completed step as `completed` and marking the next step as `in_progress`. +Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. +Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so. +When all steps are completed, call update_plan one last time with all steps marked as `completed`."#, + strict: false, + parameters: JsonSchema::Object { + properties, + required: &["plan"], + additional_properties: false, + }, + }) +}); + +/// This function doesn't do anything useful. However, it gives the model a structured way to record its plan that clients can read and render. +/// So it's the _inputs_ to this function that are useful to clients, not the outputs and neither are actually useful for the model other +/// than forcing it to come up and document a plan (TBD how that affects performance). +pub(crate) async fn handle_update_plan( + session: &Session, + arguments: String, + sub_id: String, + call_id: String, +) -> ResponseInputItem { + match parse_update_plan_arguments(arguments, &call_id) { + Ok(args) => { + let output = ResponseInputItem::FunctionCallOutput { + call_id, + output: FunctionCallOutputPayload { + content: "Plan updated".to_string(), + success: Some(true), + }, + }; + session + .send_event(Event { + id: sub_id.to_string(), + msg: EventMsg::PlanUpdate(args), + }) + .await; + output + } + Err(output) => *output, + } +} + +fn parse_update_plan_arguments( + arguments: String, + call_id: &str, +) -> Result> { + match serde_json::from_str::(&arguments) { + Ok(args) => Ok(args), + Err(e) => { + let output = ResponseInputItem::FunctionCallOutput { + call_id: call_id.to_string(), + output: FunctionCallOutputPayload { + content: format!("failed to parse function arguments: {e}"), + success: None, + }, + }; + Err(Box::new(output)) + } + } +} diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index 22bc1809..041a8c58 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -19,6 +19,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig; use crate::config_types::ReasoningSummary as ReasoningSummaryConfig; use crate::message_history::HistoryEntry; use crate::model_provider_info::ModelProviderInfo; +use crate::plan_tool::UpdatePlanArgs; /// Submission Queue Entry - requests from user #[derive(Debug, Clone, Deserialize, Serialize)] @@ -335,6 +336,8 @@ pub enum EventMsg { /// Response to GetHistoryEntryRequest. GetHistoryEntryResponse(GetHistoryEntryResponseEvent), + PlanUpdate(UpdatePlanArgs), + /// Notification that the agent is shutting down. ShutdownComplete, } diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index bc647c68..6c3f73f0 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -1,5 +1,6 @@ use codex_common::elapsed::format_elapsed; use codex_core::config::Config; +use codex_core::plan_tool::UpdatePlanArgs; use codex_core::protocol::AgentMessageDeltaEvent; use codex_core::protocol::AgentMessageEvent; use codex_core::protocol::AgentReasoningDeltaEvent; @@ -513,6 +514,11 @@ impl EventProcessor for EventProcessorWithHumanOutput { ts_println!(self, "model: {}", model); println!(); } + EventMsg::PlanUpdate(plan_update_event) => { + let UpdatePlanArgs { explanation, plan } = plan_update_event; + ts_println!(self, "explanation: {explanation:?}"); + ts_println!(self, "plan: {plan:?}"); + } EventMsg::GetHistoryEntryResponse(_) => { // Currently ignored in exec output. } diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index cf2f2bd6..ce4d7f65 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -126,6 +126,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any model_provider: None, codex_linux_sandbox_exe, base_instructions: None, + include_plan_tool: None, }; // Parse `-c` overrides. let cli_kv_overrides = match config_overrides.parse_overrides() { diff --git a/codex-rs/mcp-server/src/codex_tool_config.rs b/codex-rs/mcp-server/src/codex_tool_config.rs index 9f6f7a78..877d0e05 100644 --- a/codex-rs/mcp-server/src/codex_tool_config.rs +++ b/codex-rs/mcp-server/src/codex_tool_config.rs @@ -50,6 +50,10 @@ pub struct CodexToolCallParam { /// The set of instructions to use instead of the default ones. #[serde(default, skip_serializing_if = "Option::is_none")] pub base_instructions: Option, + + /// Whether to include the plan tool in the conversation. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub include_plan_tool: Option, } /// Custom enum mirroring [`AskForApproval`], but has an extra dependency on @@ -140,9 +144,10 @@ impl CodexToolCallParam { sandbox, config: cli_overrides, base_instructions, + include_plan_tool, } = self; - // Build the `ConfigOverrides` recognised by codex-core. + // Build the `ConfigOverrides` recognized by codex-core. let overrides = codex_core::config::ConfigOverrides { model, config_profile: profile, @@ -152,6 +157,7 @@ impl CodexToolCallParam { model_provider: None, codex_linux_sandbox_exe, base_instructions, + include_plan_tool, }; let cli_overrides = cli_overrides @@ -262,6 +268,10 @@ mod tests { "description": "Working directory for the session. If relative, it is resolved against the server process's current working directory.", "type": "string" }, + "include-plan-tool": { + "description": "Whether to include the plan tool in the conversation.", + "type": "boolean" + }, "model": { "description": "Optional override for the model name (e.g. \"o3\", \"o4-mini\").", "type": "string" diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index c3cb39c4..f25659b2 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -263,6 +263,7 @@ async fn run_codex_tool_session_inner( | EventMsg::PatchApplyBegin(_) | EventMsg::PatchApplyEnd(_) | EventMsg::GetHistoryEntryResponse(_) + | EventMsg::PlanUpdate(_) | EventMsg::ShutdownComplete => { // For now, we do not do anything extra for these // events. Note that diff --git a/codex-rs/mcp-server/tests/interrupt.rs b/codex-rs/mcp-server/tests/interrupt.rs index cd163ea0..313bc7af 100644 --- a/codex-rs/mcp-server/tests/interrupt.rs +++ b/codex-rs/mcp-server/tests/interrupt.rs @@ -81,6 +81,7 @@ async fn shell_command_interruption() -> anyhow::Result<()> { sandbox: None, config: None, base_instructions: None, + include_plan_tool: None, }) .await?; diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index f93c0a2b..6c6c6621 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -79,6 +79,7 @@ pub async fn run_main( config_profile: cli.config_profile.clone(), codex_linux_sandbox_exe, base_instructions: None, + include_plan_tool: None, }; // Parse `-c` overrides from the CLI. let cli_kv_overrides = match cli.config_overrides.parse_overrides() {