Add an experimental plan tool (#1726)
This adds a tool the model can call to update a plan. The tool doesn't actually _do_ anything but it gives clients a chance to read and render the structured plan. We will likely iterate on the prompt and tools exposed for planning over time.
This commit is contained in:
@@ -30,6 +30,7 @@ use crate::util::backoff;
|
||||
pub(crate) async fn stream_chat_completions(
|
||||
prompt: &Prompt,
|
||||
model: &str,
|
||||
include_plan_tool: bool,
|
||||
client: &reqwest::Client,
|
||||
provider: &ModelProviderInfo,
|
||||
) -> Result<ResponseStream> {
|
||||
@@ -105,7 +106,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
}
|
||||
}
|
||||
|
||||
let tools_json = create_tools_json_for_chat_completions_api(prompt, model)?;
|
||||
let tools_json = create_tools_json_for_chat_completions_api(prompt, model, include_plan_tool)?;
|
||||
let payload = json!({
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
|
||||
@@ -77,6 +77,7 @@ impl ModelClient {
|
||||
let response_stream = stream_chat_completions(
|
||||
prompt,
|
||||
&self.config.model,
|
||||
self.config.include_plan_tool,
|
||||
&self.client,
|
||||
&self.provider,
|
||||
)
|
||||
@@ -115,7 +116,11 @@ impl ModelClient {
|
||||
}
|
||||
|
||||
let full_instructions = prompt.get_full_instructions(&self.config.model);
|
||||
let tools_json = create_tools_json_for_responses_api(prompt, &self.config.model)?;
|
||||
let tools_json = create_tools_json_for_responses_api(
|
||||
prompt,
|
||||
&self.config.model,
|
||||
self.config.include_plan_tool,
|
||||
)?;
|
||||
let reasoning = create_reasoning_param_for_request(&self.config, self.effort, self.summary);
|
||||
|
||||
// Request encrypted COT if we are not storing responses,
|
||||
|
||||
@@ -55,6 +55,7 @@ use crate::models::ReasoningItemReasoningSummary;
|
||||
use crate::models::ResponseInputItem;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::models::ShellToolCallParams;
|
||||
use crate::plan_tool::handle_update_plan;
|
||||
use crate::project_doc::get_user_instructions;
|
||||
use crate::protocol::AgentMessageDeltaEvent;
|
||||
use crate::protocol::AgentMessageEvent;
|
||||
@@ -1336,6 +1337,7 @@ async fn handle_function_call(
|
||||
};
|
||||
handle_container_exec_with_params(params, sess, sub_id, call_id).await
|
||||
}
|
||||
"update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
|
||||
_ => {
|
||||
match sess.mcp_connection_manager.parse_tool_name(&name) {
|
||||
Some((server, tool_name)) => {
|
||||
|
||||
@@ -143,6 +143,9 @@ pub struct Config {
|
||||
|
||||
/// Experimental rollout resume path (absolute path to .jsonl; undocumented).
|
||||
pub experimental_resume: Option<PathBuf>,
|
||||
|
||||
/// Include an experimental plan tool that the model can use to update its current plan and status of each step.
|
||||
pub include_plan_tool: bool,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
@@ -366,6 +369,7 @@ pub struct ConfigOverrides {
|
||||
pub config_profile: Option<String>,
|
||||
pub codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
pub base_instructions: Option<String>,
|
||||
pub include_plan_tool: Option<bool>,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
@@ -388,6 +392,7 @@ impl Config {
|
||||
config_profile: config_profile_key,
|
||||
codex_linux_sandbox_exe,
|
||||
base_instructions,
|
||||
include_plan_tool,
|
||||
} = overrides;
|
||||
|
||||
let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
|
||||
@@ -521,8 +526,8 @@ impl Config {
|
||||
.chatgpt_base_url
|
||||
.or(cfg.chatgpt_base_url)
|
||||
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),
|
||||
|
||||
experimental_resume,
|
||||
include_plan_tool: include_plan_tool.unwrap_or(false),
|
||||
};
|
||||
Ok(config)
|
||||
}
|
||||
@@ -829,7 +834,7 @@ disable_response_storage = true
|
||||
///
|
||||
/// 1. custom command-line argument, e.g. `--model o3`
|
||||
/// 2. as part of a profile, where the `--profile` is specified via a CLI
|
||||
/// (or in the config file itelf)
|
||||
/// (or in the config file itself)
|
||||
/// 3. as an entry in `config.toml`, e.g. `model = "o3"`
|
||||
/// 4. the default value for a required field defined in code, e.g.,
|
||||
/// `crate::flags::OPENAI_DEFAULT_MODEL`
|
||||
@@ -879,6 +884,7 @@ disable_response_storage = true
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
experimental_resume: None,
|
||||
base_instructions: None,
|
||||
include_plan_tool: false,
|
||||
},
|
||||
o3_profile_config
|
||||
);
|
||||
@@ -927,6 +933,7 @@ disable_response_storage = true
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
experimental_resume: None,
|
||||
base_instructions: None,
|
||||
include_plan_tool: false,
|
||||
};
|
||||
|
||||
assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
|
||||
@@ -990,6 +997,7 @@ disable_response_storage = true
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
experimental_resume: None,
|
||||
base_instructions: None,
|
||||
include_plan_tool: false,
|
||||
};
|
||||
|
||||
assert_eq!(expected_zdr_profile_config, zdr_profile_config);
|
||||
|
||||
@@ -34,6 +34,7 @@ mod models;
|
||||
pub mod openai_api_key;
|
||||
mod openai_model_info;
|
||||
mod openai_tools;
|
||||
pub mod plan_tool;
|
||||
mod project_doc;
|
||||
pub mod protocol;
|
||||
mod rollout;
|
||||
|
||||
@@ -4,13 +4,14 @@ use std::collections::BTreeMap;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use crate::client_common::Prompt;
|
||||
use crate::plan_tool::PLAN_TOOL;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(crate) struct ResponsesApiTool {
|
||||
name: &'static str,
|
||||
description: &'static str,
|
||||
strict: bool,
|
||||
parameters: JsonSchema,
|
||||
pub(crate) name: &'static str,
|
||||
pub(crate) description: &'static str,
|
||||
pub(crate) strict: bool,
|
||||
pub(crate) parameters: JsonSchema,
|
||||
}
|
||||
|
||||
/// When serialized as JSON, this produces a valid "Tool" in the OpenAI
|
||||
@@ -74,6 +75,7 @@ static DEFAULT_CODEX_MODEL_TOOLS: LazyLock<Vec<OpenAiTool>> =
|
||||
pub(crate) fn create_tools_json_for_responses_api(
|
||||
prompt: &Prompt,
|
||||
model: &str,
|
||||
include_plan_tool: bool,
|
||||
) -> crate::error::Result<Vec<serde_json::Value>> {
|
||||
// Assemble tool list: built-in tools + any extra tools from the prompt.
|
||||
let default_tools = if model.starts_with("codex") {
|
||||
@@ -93,6 +95,10 @@ pub(crate) fn create_tools_json_for_responses_api(
|
||||
.map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
|
||||
);
|
||||
|
||||
if include_plan_tool {
|
||||
tools_json.push(serde_json::to_value(PLAN_TOOL.clone())?);
|
||||
}
|
||||
|
||||
Ok(tools_json)
|
||||
}
|
||||
|
||||
@@ -102,10 +108,12 @@ pub(crate) fn create_tools_json_for_responses_api(
|
||||
pub(crate) fn create_tools_json_for_chat_completions_api(
|
||||
prompt: &Prompt,
|
||||
model: &str,
|
||||
include_plan_tool: bool,
|
||||
) -> crate::error::Result<Vec<serde_json::Value>> {
|
||||
// We start with the JSON for the Responses API and than rewrite it to match
|
||||
// the chat completions tool call format.
|
||||
let responses_api_tools_json = create_tools_json_for_responses_api(prompt, model)?;
|
||||
let responses_api_tools_json =
|
||||
create_tools_json_for_responses_api(prompt, model, include_plan_tool)?;
|
||||
let tools_json = responses_api_tools_json
|
||||
.into_iter()
|
||||
.filter_map(|mut tool| {
|
||||
|
||||
126
codex-rs/core/src/plan_tool.rs
Normal file
126
codex-rs/core/src/plan_tool.rs
Normal file
@@ -0,0 +1,126 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::models::FunctionCallOutputPayload;
|
||||
use crate::models::ResponseInputItem;
|
||||
use crate::openai_tools::JsonSchema;
|
||||
use crate::openai_tools::OpenAiTool;
|
||||
use crate::openai_tools::ResponsesApiTool;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
|
||||
// Types for the TODO tool arguments matching codex-vscode/todo-mcp/src/main.rs
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum StepStatus {
|
||||
Pending,
|
||||
InProgress,
|
||||
Completed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct PlanItemArg {
|
||||
pub step: String,
|
||||
pub status: StepStatus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct UpdatePlanArgs {
|
||||
#[serde(default)]
|
||||
pub explanation: Option<String>,
|
||||
pub plan: Vec<PlanItemArg>,
|
||||
}
|
||||
|
||||
pub(crate) static PLAN_TOOL: LazyLock<OpenAiTool> = LazyLock::new(|| {
|
||||
let mut plan_item_props = BTreeMap::new();
|
||||
plan_item_props.insert("step".to_string(), JsonSchema::String);
|
||||
plan_item_props.insert("status".to_string(), JsonSchema::String);
|
||||
|
||||
let plan_items_schema = JsonSchema::Array {
|
||||
items: Box::new(JsonSchema::Object {
|
||||
properties: plan_item_props,
|
||||
required: &["step", "status"],
|
||||
additional_properties: false,
|
||||
}),
|
||||
};
|
||||
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert("explanation".to_string(), JsonSchema::String);
|
||||
properties.insert("plan".to_string(), plan_items_schema);
|
||||
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "update_plan",
|
||||
description: r#"Use the update_plan tool to keep the user updated on the current plan for the task.
|
||||
After understanding the user's task, call the update_plan tool with an initial plan. An example of a plan:
|
||||
1. Explore the codebase to find relevant files (status: in_progress)
|
||||
2. Implement the feature in the XYZ component (status: pending)
|
||||
3. Commit changes and make a pull request (status: pending)
|
||||
Each step should be a short, 1-sentence description.
|
||||
Until all the steps are finished, there should always be exactly one in_progress step in the plan.
|
||||
Call the update_plan tool whenever you finish a step, marking the completed step as `completed` and marking the next step as `in_progress`.
|
||||
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step.
|
||||
Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
|
||||
When all steps are completed, call update_plan one last time with all steps marked as `completed`."#,
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: &["plan"],
|
||||
additional_properties: false,
|
||||
},
|
||||
})
|
||||
});
|
||||
|
||||
/// This function doesn't do anything useful. However, it gives the model a structured way to record its plan that clients can read and render.
|
||||
/// So it's the _inputs_ to this function that are useful to clients, not the outputs and neither are actually useful for the model other
|
||||
/// than forcing it to come up and document a plan (TBD how that affects performance).
|
||||
pub(crate) async fn handle_update_plan(
|
||||
session: &Session,
|
||||
arguments: String,
|
||||
sub_id: String,
|
||||
call_id: String,
|
||||
) -> ResponseInputItem {
|
||||
match parse_update_plan_arguments(arguments, &call_id) {
|
||||
Ok(args) => {
|
||||
let output = ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "Plan updated".to_string(),
|
||||
success: Some(true),
|
||||
},
|
||||
};
|
||||
session
|
||||
.send_event(Event {
|
||||
id: sub_id.to_string(),
|
||||
msg: EventMsg::PlanUpdate(args),
|
||||
})
|
||||
.await;
|
||||
output
|
||||
}
|
||||
Err(output) => *output,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_update_plan_arguments(
|
||||
arguments: String,
|
||||
call_id: &str,
|
||||
) -> Result<UpdatePlanArgs, Box<ResponseInputItem>> {
|
||||
match serde_json::from_str::<UpdatePlanArgs>(&arguments) {
|
||||
Ok(args) => Ok(args),
|
||||
Err(e) => {
|
||||
let output = ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!("failed to parse function arguments: {e}"),
|
||||
success: None,
|
||||
},
|
||||
};
|
||||
Err(Box::new(output))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use crate::message_history::HistoryEntry;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::plan_tool::UpdatePlanArgs;
|
||||
|
||||
/// Submission Queue Entry - requests from user
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
@@ -335,6 +336,8 @@ pub enum EventMsg {
|
||||
/// Response to GetHistoryEntryRequest.
|
||||
GetHistoryEntryResponse(GetHistoryEntryResponseEvent),
|
||||
|
||||
PlanUpdate(UpdatePlanArgs),
|
||||
|
||||
/// Notification that the agent is shutting down.
|
||||
ShutdownComplete,
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use codex_common::elapsed::format_elapsed;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::plan_tool::UpdatePlanArgs;
|
||||
use codex_core::protocol::AgentMessageDeltaEvent;
|
||||
use codex_core::protocol::AgentMessageEvent;
|
||||
use codex_core::protocol::AgentReasoningDeltaEvent;
|
||||
@@ -513,6 +514,11 @@ impl EventProcessor for EventProcessorWithHumanOutput {
|
||||
ts_println!(self, "model: {}", model);
|
||||
println!();
|
||||
}
|
||||
EventMsg::PlanUpdate(plan_update_event) => {
|
||||
let UpdatePlanArgs { explanation, plan } = plan_update_event;
|
||||
ts_println!(self, "explanation: {explanation:?}");
|
||||
ts_println!(self, "plan: {plan:?}");
|
||||
}
|
||||
EventMsg::GetHistoryEntryResponse(_) => {
|
||||
// Currently ignored in exec output.
|
||||
}
|
||||
|
||||
@@ -126,6 +126,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
model_provider: None,
|
||||
codex_linux_sandbox_exe,
|
||||
base_instructions: None,
|
||||
include_plan_tool: None,
|
||||
};
|
||||
// Parse `-c` overrides.
|
||||
let cli_kv_overrides = match config_overrides.parse_overrides() {
|
||||
|
||||
@@ -50,6 +50,10 @@ pub struct CodexToolCallParam {
|
||||
/// The set of instructions to use instead of the default ones.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub base_instructions: Option<String>,
|
||||
|
||||
/// Whether to include the plan tool in the conversation.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub include_plan_tool: Option<bool>,
|
||||
}
|
||||
|
||||
/// Custom enum mirroring [`AskForApproval`], but has an extra dependency on
|
||||
@@ -140,9 +144,10 @@ impl CodexToolCallParam {
|
||||
sandbox,
|
||||
config: cli_overrides,
|
||||
base_instructions,
|
||||
include_plan_tool,
|
||||
} = self;
|
||||
|
||||
// Build the `ConfigOverrides` recognised by codex-core.
|
||||
// Build the `ConfigOverrides` recognized by codex-core.
|
||||
let overrides = codex_core::config::ConfigOverrides {
|
||||
model,
|
||||
config_profile: profile,
|
||||
@@ -152,6 +157,7 @@ impl CodexToolCallParam {
|
||||
model_provider: None,
|
||||
codex_linux_sandbox_exe,
|
||||
base_instructions,
|
||||
include_plan_tool,
|
||||
};
|
||||
|
||||
let cli_overrides = cli_overrides
|
||||
@@ -262,6 +268,10 @@ mod tests {
|
||||
"description": "Working directory for the session. If relative, it is resolved against the server process's current working directory.",
|
||||
"type": "string"
|
||||
},
|
||||
"include-plan-tool": {
|
||||
"description": "Whether to include the plan tool in the conversation.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"model": {
|
||||
"description": "Optional override for the model name (e.g. \"o3\", \"o4-mini\").",
|
||||
"type": "string"
|
||||
|
||||
@@ -263,6 +263,7 @@ async fn run_codex_tool_session_inner(
|
||||
| EventMsg::PatchApplyBegin(_)
|
||||
| EventMsg::PatchApplyEnd(_)
|
||||
| EventMsg::GetHistoryEntryResponse(_)
|
||||
| EventMsg::PlanUpdate(_)
|
||||
| EventMsg::ShutdownComplete => {
|
||||
// For now, we do not do anything extra for these
|
||||
// events. Note that
|
||||
|
||||
@@ -81,6 +81,7 @@ async fn shell_command_interruption() -> anyhow::Result<()> {
|
||||
sandbox: None,
|
||||
config: None,
|
||||
base_instructions: None,
|
||||
include_plan_tool: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -79,6 +79,7 @@ pub async fn run_main(
|
||||
config_profile: cli.config_profile.clone(),
|
||||
codex_linux_sandbox_exe,
|
||||
base_instructions: None,
|
||||
include_plan_tool: None,
|
||||
};
|
||||
// Parse `-c` overrides from the CLI.
|
||||
let cli_kv_overrides = match cli.config_overrides.parse_overrides() {
|
||||
|
||||
Reference in New Issue
Block a user