Add an experimental plan tool (#1726)

This adds a tool the model can call to update a plan. The tool doesn't
actually _do_ anything but it gives clients a chance to read and render
the structured plan. We will likely iterate on the prompt and tools
exposed for planning over time.
This commit is contained in:
Gabriel Peal
2025-07-29 11:22:02 -07:00
committed by GitHub
parent f8fcaaaf6f
commit 8828f6f082
14 changed files with 184 additions and 10 deletions

View File

@@ -30,6 +30,7 @@ use crate::util::backoff;
pub(crate) async fn stream_chat_completions( pub(crate) async fn stream_chat_completions(
prompt: &Prompt, prompt: &Prompt,
model: &str, model: &str,
include_plan_tool: bool,
client: &reqwest::Client, client: &reqwest::Client,
provider: &ModelProviderInfo, provider: &ModelProviderInfo,
) -> Result<ResponseStream> { ) -> Result<ResponseStream> {
@@ -105,7 +106,7 @@ pub(crate) async fn stream_chat_completions(
} }
} }
let tools_json = create_tools_json_for_chat_completions_api(prompt, model)?; let tools_json = create_tools_json_for_chat_completions_api(prompt, model, include_plan_tool)?;
let payload = json!({ let payload = json!({
"model": model, "model": model,
"messages": messages, "messages": messages,

View File

@@ -77,6 +77,7 @@ impl ModelClient {
let response_stream = stream_chat_completions( let response_stream = stream_chat_completions(
prompt, prompt,
&self.config.model, &self.config.model,
self.config.include_plan_tool,
&self.client, &self.client,
&self.provider, &self.provider,
) )
@@ -115,7 +116,11 @@ impl ModelClient {
} }
let full_instructions = prompt.get_full_instructions(&self.config.model); let full_instructions = prompt.get_full_instructions(&self.config.model);
let tools_json = create_tools_json_for_responses_api(prompt, &self.config.model)?; let tools_json = create_tools_json_for_responses_api(
prompt,
&self.config.model,
self.config.include_plan_tool,
)?;
let reasoning = create_reasoning_param_for_request(&self.config, self.effort, self.summary); let reasoning = create_reasoning_param_for_request(&self.config, self.effort, self.summary);
// Request encrypted COT if we are not storing responses, // Request encrypted COT if we are not storing responses,

View File

@@ -55,6 +55,7 @@ use crate::models::ReasoningItemReasoningSummary;
use crate::models::ResponseInputItem; use crate::models::ResponseInputItem;
use crate::models::ResponseItem; use crate::models::ResponseItem;
use crate::models::ShellToolCallParams; use crate::models::ShellToolCallParams;
use crate::plan_tool::handle_update_plan;
use crate::project_doc::get_user_instructions; use crate::project_doc::get_user_instructions;
use crate::protocol::AgentMessageDeltaEvent; use crate::protocol::AgentMessageDeltaEvent;
use crate::protocol::AgentMessageEvent; use crate::protocol::AgentMessageEvent;
@@ -1336,6 +1337,7 @@ async fn handle_function_call(
}; };
handle_container_exec_with_params(params, sess, sub_id, call_id).await handle_container_exec_with_params(params, sess, sub_id, call_id).await
} }
"update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
_ => { _ => {
match sess.mcp_connection_manager.parse_tool_name(&name) { match sess.mcp_connection_manager.parse_tool_name(&name) {
Some((server, tool_name)) => { Some((server, tool_name)) => {

View File

@@ -143,6 +143,9 @@ pub struct Config {
/// Experimental rollout resume path (absolute path to .jsonl; undocumented). /// Experimental rollout resume path (absolute path to .jsonl; undocumented).
pub experimental_resume: Option<PathBuf>, pub experimental_resume: Option<PathBuf>,
/// Include an experimental plan tool that the model can use to update its current plan and status of each step.
pub include_plan_tool: bool,
} }
impl Config { impl Config {
@@ -366,6 +369,7 @@ pub struct ConfigOverrides {
pub config_profile: Option<String>, pub config_profile: Option<String>,
pub codex_linux_sandbox_exe: Option<PathBuf>, pub codex_linux_sandbox_exe: Option<PathBuf>,
pub base_instructions: Option<String>, pub base_instructions: Option<String>,
pub include_plan_tool: Option<bool>,
} }
impl Config { impl Config {
@@ -388,6 +392,7 @@ impl Config {
config_profile: config_profile_key, config_profile: config_profile_key,
codex_linux_sandbox_exe, codex_linux_sandbox_exe,
base_instructions, base_instructions,
include_plan_tool,
} = overrides; } = overrides;
let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) { let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
@@ -521,8 +526,8 @@ impl Config {
.chatgpt_base_url .chatgpt_base_url
.or(cfg.chatgpt_base_url) .or(cfg.chatgpt_base_url)
.unwrap_or("https://chatgpt.com/backend-api/".to_string()), .unwrap_or("https://chatgpt.com/backend-api/".to_string()),
experimental_resume, experimental_resume,
include_plan_tool: include_plan_tool.unwrap_or(false),
}; };
Ok(config) Ok(config)
} }
@@ -829,7 +834,7 @@ disable_response_storage = true
/// ///
/// 1. custom command-line argument, e.g. `--model o3` /// 1. custom command-line argument, e.g. `--model o3`
/// 2. as part of a profile, where the `--profile` is specified via a CLI /// 2. as part of a profile, where the `--profile` is specified via a CLI
/// (or in the config file itelf) /// (or in the config file itself)
/// 3. as an entry in `config.toml`, e.g. `model = "o3"` /// 3. as an entry in `config.toml`, e.g. `model = "o3"`
/// 4. the default value for a required field defined in code, e.g., /// 4. the default value for a required field defined in code, e.g.,
/// `crate::flags::OPENAI_DEFAULT_MODEL` /// `crate::flags::OPENAI_DEFAULT_MODEL`
@@ -879,6 +884,7 @@ disable_response_storage = true
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
experimental_resume: None, experimental_resume: None,
base_instructions: None, base_instructions: None,
include_plan_tool: false,
}, },
o3_profile_config o3_profile_config
); );
@@ -927,6 +933,7 @@ disable_response_storage = true
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
experimental_resume: None, experimental_resume: None,
base_instructions: None, base_instructions: None,
include_plan_tool: false,
}; };
assert_eq!(expected_gpt3_profile_config, gpt3_profile_config); assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
@@ -990,6 +997,7 @@ disable_response_storage = true
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
experimental_resume: None, experimental_resume: None,
base_instructions: None, base_instructions: None,
include_plan_tool: false,
}; };
assert_eq!(expected_zdr_profile_config, zdr_profile_config); assert_eq!(expected_zdr_profile_config, zdr_profile_config);

View File

@@ -34,6 +34,7 @@ mod models;
pub mod openai_api_key; pub mod openai_api_key;
mod openai_model_info; mod openai_model_info;
mod openai_tools; mod openai_tools;
pub mod plan_tool;
mod project_doc; mod project_doc;
pub mod protocol; pub mod protocol;
mod rollout; mod rollout;

View File

@@ -4,13 +4,14 @@ use std::collections::BTreeMap;
use std::sync::LazyLock; use std::sync::LazyLock;
use crate::client_common::Prompt; use crate::client_common::Prompt;
use crate::plan_tool::PLAN_TOOL;
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
pub(crate) struct ResponsesApiTool { pub(crate) struct ResponsesApiTool {
name: &'static str, pub(crate) name: &'static str,
description: &'static str, pub(crate) description: &'static str,
strict: bool, pub(crate) strict: bool,
parameters: JsonSchema, pub(crate) parameters: JsonSchema,
} }
/// When serialized as JSON, this produces a valid "Tool" in the OpenAI /// When serialized as JSON, this produces a valid "Tool" in the OpenAI
@@ -74,6 +75,7 @@ static DEFAULT_CODEX_MODEL_TOOLS: LazyLock<Vec<OpenAiTool>> =
pub(crate) fn create_tools_json_for_responses_api( pub(crate) fn create_tools_json_for_responses_api(
prompt: &Prompt, prompt: &Prompt,
model: &str, model: &str,
include_plan_tool: bool,
) -> crate::error::Result<Vec<serde_json::Value>> { ) -> crate::error::Result<Vec<serde_json::Value>> {
// Assemble tool list: built-in tools + any extra tools from the prompt. // Assemble tool list: built-in tools + any extra tools from the prompt.
let default_tools = if model.starts_with("codex") { let default_tools = if model.starts_with("codex") {
@@ -93,6 +95,10 @@ pub(crate) fn create_tools_json_for_responses_api(
.map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)), .map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
); );
if include_plan_tool {
tools_json.push(serde_json::to_value(PLAN_TOOL.clone())?);
}
Ok(tools_json) Ok(tools_json)
} }
@@ -102,10 +108,12 @@ pub(crate) fn create_tools_json_for_responses_api(
pub(crate) fn create_tools_json_for_chat_completions_api( pub(crate) fn create_tools_json_for_chat_completions_api(
prompt: &Prompt, prompt: &Prompt,
model: &str, model: &str,
include_plan_tool: bool,
) -> crate::error::Result<Vec<serde_json::Value>> { ) -> crate::error::Result<Vec<serde_json::Value>> {
// We start with the JSON for the Responses API and than rewrite it to match // We start with the JSON for the Responses API and than rewrite it to match
// the chat completions tool call format. // the chat completions tool call format.
let responses_api_tools_json = create_tools_json_for_responses_api(prompt, model)?; let responses_api_tools_json =
create_tools_json_for_responses_api(prompt, model, include_plan_tool)?;
let tools_json = responses_api_tools_json let tools_json = responses_api_tools_json
.into_iter() .into_iter()
.filter_map(|mut tool| { .filter_map(|mut tool| {

View File

@@ -0,0 +1,126 @@
use std::collections::BTreeMap;
use std::sync::LazyLock;
use serde::Deserialize;
use serde::Serialize;
use crate::codex::Session;
use crate::models::FunctionCallOutputPayload;
use crate::models::ResponseInputItem;
use crate::openai_tools::JsonSchema;
use crate::openai_tools::OpenAiTool;
use crate::openai_tools::ResponsesApiTool;
use crate::protocol::Event;
use crate::protocol::EventMsg;
// Types for the TODO tool arguments matching codex-vscode/todo-mcp/src/main.rs
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum StepStatus {
Pending,
InProgress,
Completed,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct PlanItemArg {
pub step: String,
pub status: StepStatus,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct UpdatePlanArgs {
#[serde(default)]
pub explanation: Option<String>,
pub plan: Vec<PlanItemArg>,
}
pub(crate) static PLAN_TOOL: LazyLock<OpenAiTool> = LazyLock::new(|| {
let mut plan_item_props = BTreeMap::new();
plan_item_props.insert("step".to_string(), JsonSchema::String);
plan_item_props.insert("status".to_string(), JsonSchema::String);
let plan_items_schema = JsonSchema::Array {
items: Box::new(JsonSchema::Object {
properties: plan_item_props,
required: &["step", "status"],
additional_properties: false,
}),
};
let mut properties = BTreeMap::new();
properties.insert("explanation".to_string(), JsonSchema::String);
properties.insert("plan".to_string(), plan_items_schema);
OpenAiTool::Function(ResponsesApiTool {
name: "update_plan",
description: r#"Use the update_plan tool to keep the user updated on the current plan for the task.
After understanding the user's task, call the update_plan tool with an initial plan. An example of a plan:
1. Explore the codebase to find relevant files (status: in_progress)
2. Implement the feature in the XYZ component (status: pending)
3. Commit changes and make a pull request (status: pending)
Each step should be a short, 1-sentence description.
Until all the steps are finished, there should always be exactly one in_progress step in the plan.
Call the update_plan tool whenever you finish a step, marking the completed step as `completed` and marking the next step as `in_progress`.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step.
Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
When all steps are completed, call update_plan one last time with all steps marked as `completed`."#,
strict: false,
parameters: JsonSchema::Object {
properties,
required: &["plan"],
additional_properties: false,
},
})
});
/// This function doesn't do anything useful. However, it gives the model a structured way to record its plan that clients can read and render.
/// So it's the _inputs_ to this function that are useful to clients, not the outputs and neither are actually useful for the model other
/// than forcing it to come up and document a plan (TBD how that affects performance).
pub(crate) async fn handle_update_plan(
session: &Session,
arguments: String,
sub_id: String,
call_id: String,
) -> ResponseInputItem {
match parse_update_plan_arguments(arguments, &call_id) {
Ok(args) => {
let output = ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: "Plan updated".to_string(),
success: Some(true),
},
};
session
.send_event(Event {
id: sub_id.to_string(),
msg: EventMsg::PlanUpdate(args),
})
.await;
output
}
Err(output) => *output,
}
}
fn parse_update_plan_arguments(
arguments: String,
call_id: &str,
) -> Result<UpdatePlanArgs, Box<ResponseInputItem>> {
match serde_json::from_str::<UpdatePlanArgs>(&arguments) {
Ok(args) => Ok(args),
Err(e) => {
let output = ResponseInputItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload {
content: format!("failed to parse function arguments: {e}"),
success: None,
},
};
Err(Box::new(output))
}
}
}

View File

@@ -19,6 +19,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig; use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
use crate::message_history::HistoryEntry; use crate::message_history::HistoryEntry;
use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::ModelProviderInfo;
use crate::plan_tool::UpdatePlanArgs;
/// Submission Queue Entry - requests from user /// Submission Queue Entry - requests from user
#[derive(Debug, Clone, Deserialize, Serialize)] #[derive(Debug, Clone, Deserialize, Serialize)]
@@ -335,6 +336,8 @@ pub enum EventMsg {
/// Response to GetHistoryEntryRequest. /// Response to GetHistoryEntryRequest.
GetHistoryEntryResponse(GetHistoryEntryResponseEvent), GetHistoryEntryResponse(GetHistoryEntryResponseEvent),
PlanUpdate(UpdatePlanArgs),
/// Notification that the agent is shutting down. /// Notification that the agent is shutting down.
ShutdownComplete, ShutdownComplete,
} }

View File

@@ -1,5 +1,6 @@
use codex_common::elapsed::format_elapsed; use codex_common::elapsed::format_elapsed;
use codex_core::config::Config; use codex_core::config::Config;
use codex_core::plan_tool::UpdatePlanArgs;
use codex_core::protocol::AgentMessageDeltaEvent; use codex_core::protocol::AgentMessageDeltaEvent;
use codex_core::protocol::AgentMessageEvent; use codex_core::protocol::AgentMessageEvent;
use codex_core::protocol::AgentReasoningDeltaEvent; use codex_core::protocol::AgentReasoningDeltaEvent;
@@ -513,6 +514,11 @@ impl EventProcessor for EventProcessorWithHumanOutput {
ts_println!(self, "model: {}", model); ts_println!(self, "model: {}", model);
println!(); println!();
} }
EventMsg::PlanUpdate(plan_update_event) => {
let UpdatePlanArgs { explanation, plan } = plan_update_event;
ts_println!(self, "explanation: {explanation:?}");
ts_println!(self, "plan: {plan:?}");
}
EventMsg::GetHistoryEntryResponse(_) => { EventMsg::GetHistoryEntryResponse(_) => {
// Currently ignored in exec output. // Currently ignored in exec output.
} }

View File

@@ -126,6 +126,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
model_provider: None, model_provider: None,
codex_linux_sandbox_exe, codex_linux_sandbox_exe,
base_instructions: None, base_instructions: None,
include_plan_tool: None,
}; };
// Parse `-c` overrides. // Parse `-c` overrides.
let cli_kv_overrides = match config_overrides.parse_overrides() { let cli_kv_overrides = match config_overrides.parse_overrides() {

View File

@@ -50,6 +50,10 @@ pub struct CodexToolCallParam {
/// The set of instructions to use instead of the default ones. /// The set of instructions to use instead of the default ones.
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub base_instructions: Option<String>, pub base_instructions: Option<String>,
/// Whether to include the plan tool in the conversation.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub include_plan_tool: Option<bool>,
} }
/// Custom enum mirroring [`AskForApproval`], but has an extra dependency on /// Custom enum mirroring [`AskForApproval`], but has an extra dependency on
@@ -140,9 +144,10 @@ impl CodexToolCallParam {
sandbox, sandbox,
config: cli_overrides, config: cli_overrides,
base_instructions, base_instructions,
include_plan_tool,
} = self; } = self;
// Build the `ConfigOverrides` recognised by codex-core. // Build the `ConfigOverrides` recognized by codex-core.
let overrides = codex_core::config::ConfigOverrides { let overrides = codex_core::config::ConfigOverrides {
model, model,
config_profile: profile, config_profile: profile,
@@ -152,6 +157,7 @@ impl CodexToolCallParam {
model_provider: None, model_provider: None,
codex_linux_sandbox_exe, codex_linux_sandbox_exe,
base_instructions, base_instructions,
include_plan_tool,
}; };
let cli_overrides = cli_overrides let cli_overrides = cli_overrides
@@ -262,6 +268,10 @@ mod tests {
"description": "Working directory for the session. If relative, it is resolved against the server process's current working directory.", "description": "Working directory for the session. If relative, it is resolved against the server process's current working directory.",
"type": "string" "type": "string"
}, },
"include-plan-tool": {
"description": "Whether to include the plan tool in the conversation.",
"type": "boolean"
},
"model": { "model": {
"description": "Optional override for the model name (e.g. \"o3\", \"o4-mini\").", "description": "Optional override for the model name (e.g. \"o3\", \"o4-mini\").",
"type": "string" "type": "string"

View File

@@ -263,6 +263,7 @@ async fn run_codex_tool_session_inner(
| EventMsg::PatchApplyBegin(_) | EventMsg::PatchApplyBegin(_)
| EventMsg::PatchApplyEnd(_) | EventMsg::PatchApplyEnd(_)
| EventMsg::GetHistoryEntryResponse(_) | EventMsg::GetHistoryEntryResponse(_)
| EventMsg::PlanUpdate(_)
| EventMsg::ShutdownComplete => { | EventMsg::ShutdownComplete => {
// For now, we do not do anything extra for these // For now, we do not do anything extra for these
// events. Note that // events. Note that

View File

@@ -81,6 +81,7 @@ async fn shell_command_interruption() -> anyhow::Result<()> {
sandbox: None, sandbox: None,
config: None, config: None,
base_instructions: None, base_instructions: None,
include_plan_tool: None,
}) })
.await?; .await?;

View File

@@ -79,6 +79,7 @@ pub async fn run_main(
config_profile: cli.config_profile.clone(), config_profile: cli.config_profile.clone(),
codex_linux_sandbox_exe, codex_linux_sandbox_exe,
base_instructions: None, base_instructions: None,
include_plan_tool: None,
}; };
// Parse `-c` overrides from the CLI. // Parse `-c` overrides from the CLI.
let cli_kv_overrides = match cli.config_overrides.parse_overrides() { let cli_kv_overrides = match cli.config_overrides.parse_overrides() {