Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
This commit is contained in:
Gabriel Peal
2025-07-21 23:58:41 -07:00
committed by GitHub
parent 6cf4b96f9d
commit 710f728124
8 changed files with 584 additions and 197 deletions

View File

@@ -139,14 +139,18 @@ impl McpProcess {
/// Returns the id used to make the request so it can be used when
/// correlating notifications.
pub async fn send_codex_tool_call(&mut self, prompt: &str) -> anyhow::Result<i64> {
pub async fn send_codex_tool_call(
&mut self,
cwd: Option<String>,
prompt: &str,
) -> anyhow::Result<i64> {
let codex_tool_call_params = CallToolRequestParams {
name: "codex".to_string(),
arguments: Some(serde_json::to_value(CodexToolCallParam {
cwd,
prompt: prompt.to_string(),
model: None,
profile: None,
cwd: None,
approval_policy: None,
sandbox: None,
config: None,

View File

@@ -4,5 +4,6 @@ mod responses;
pub use mcp_process::McpProcess;
pub use mock_model_server::create_mock_chat_completions_server;
pub use responses::create_apply_patch_sse_response;
pub use responses::create_final_assistant_message_sse_response;
pub use responses::create_shell_sse_response;

View File

@@ -57,3 +57,39 @@ pub fn create_final_assistant_message_sse_response(message: &str) -> anyhow::Res
);
Ok(sse)
}
pub fn create_apply_patch_sse_response(
patch_content: &str,
call_id: &str,
) -> anyhow::Result<String> {
// Use shell command to call apply_patch with heredoc format
let shell_command = format!("apply_patch <<'EOF'\n{patch_content}\nEOF");
let tool_call_arguments = serde_json::to_string(&json!({
"command": ["bash", "-lc", shell_command]
}))?;
let tool_call = json!({
"choices": [
{
"delta": {
"tool_calls": [
{
"id": call_id,
"function": {
"name": "shell",
"arguments": tool_call_arguments
}
}
]
},
"finish_reason": "tool_calls"
}
]
});
let sse = format!(
"data: {}\n\ndata: DONE\n\n",
serde_json::to_string(&tool_call)?
);
Ok(sse)
}

View File

@@ -1,11 +1,17 @@
mod common;
use std::collections::HashMap;
use std::env;
use std::path::Path;
use std::path::PathBuf;
use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use codex_core::protocol::FileChange;
use codex_core::protocol::ReviewDecision;
use codex_mcp_server::ExecApprovalElicitRequestParams;
use codex_mcp_server::ExecApprovalResponse;
use codex_mcp_server::PatchApprovalElicitRequestParams;
use codex_mcp_server::PatchApprovalResponse;
use mcp_types::ElicitRequest;
use mcp_types::ElicitRequestParamsRequestedSchema;
use mcp_types::JSONRPC_VERSION;
@@ -17,8 +23,10 @@ use pretty_assertions::assert_eq;
use serde_json::json;
use tempfile::TempDir;
use tokio::time::timeout;
use wiremock::MockServer;
use crate::common::McpProcess;
use crate::common::create_apply_patch_sse_response;
use crate::common::create_final_assistant_message_sse_response;
use crate::common::create_mock_chat_completions_server;
use crate::common::create_shell_sse_response;
@@ -30,7 +38,7 @@ const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs
/// command, as expected.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_shell_command_approval_triggers_elicitation() {
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
println!(
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
);
@@ -49,12 +57,11 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
let shell_command = vec!["git".to_string(), "init".to_string()];
let workdir_for_shell_function_call = TempDir::new()?;
// Configure the mock server so it makes two responses:
// 1. The first response is a shell function call that will trigger an
// elicitation request.
// 2. The second response is the final assistant message that should be
// returned after the elicitation is approved and the command is run.
let server = create_mock_chat_completions_server(vec![
let McpHandle {
process: mut mcp_process,
server: _server,
dir: _dir,
} = create_mcp_process(vec![
create_shell_sse_response(
shell_command.clone(),
Some(workdir_for_shell_function_call.path()),
@@ -63,18 +70,14 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
)?,
create_final_assistant_message_sse_response("Enjoy your new git repo!")?,
])
.await;
// Run `codex mcp` with a specific config.toml.
let codex_home = TempDir::new()?;
create_config_toml(codex_home.path(), server.uri())?;
let mut mcp_process = McpProcess::new(codex_home.path()).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
.await?;
// Send a "codex" tool request, which should hit the completions endpoint.
// In turn, it should reply with a tool call, which the MCP should forward
// as an elicitation.
let codex_request_id = mcp_process.send_codex_tool_call("run `git init`").await?;
let codex_request_id = mcp_process
.send_codex_tool_call(None, "run `git init`")
.await?;
let elicitation_request = timeout(
DEFAULT_READ_TIMEOUT,
mcp_process.read_stream_until_request_message(),
@@ -136,32 +139,6 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
Ok(())
}
/// Create a Codex config that uses the mock server as the model provider.
/// It also uses `approval_policy = "untrusted"` so that we exercise the
/// elicitation code path for shell commands.
fn create_config_toml(codex_home: &Path, server_uri: String) -> std::io::Result<()> {
let config_toml = codex_home.join("config.toml");
std::fs::write(
config_toml,
format!(
r#"
model = "mock-model"
approval_policy = "untrusted"
sandbox_policy = "read-only"
model_provider = "mock_provider"
[model_providers.mock_provider]
name = "Mock provider for test"
base_url = "{server_uri}/v1"
wire_api = "chat"
request_max_retries = 0
stream_max_retries = 0
"#
),
)
}
fn create_expected_elicitation_request(
elicitation_request_id: RequestId,
command: Vec<String>,
@@ -193,3 +170,197 @@ fn create_expected_elicitation_request(
})?),
})
}
/// Test that patch approval triggers an elicitation request to the MCP and that
/// sending the approval applies the patch, as expected.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_patch_approval_triggers_elicitation() {
if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
println!(
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
);
return;
}
if let Err(err) = patch_approval_triggers_elicitation().await {
panic!("failure: {err}");
}
}
async fn patch_approval_triggers_elicitation() -> anyhow::Result<()> {
let cwd = TempDir::new()?;
let test_file = cwd.path().join("destination_file.txt");
std::fs::write(&test_file, "original content\n")?;
let patch_content = format!(
"*** Begin Patch\n*** Update File: {}\n-original content\n+modified content\n*** End Patch",
test_file.as_path().to_string_lossy()
);
let McpHandle {
process: mut mcp_process,
server: _server,
dir: _dir,
} = create_mcp_process(vec![
create_apply_patch_sse_response(&patch_content, "call1234")?,
create_final_assistant_message_sse_response("Patch has been applied successfully!")?,
])
.await?;
// Send a "codex" tool request that will trigger the apply_patch command
let codex_request_id = mcp_process
.send_codex_tool_call(
Some(cwd.path().to_string_lossy().to_string()),
"please modify the test file",
)
.await?;
let elicitation_request = timeout(
DEFAULT_READ_TIMEOUT,
mcp_process.read_stream_until_request_message(),
)
.await??;
let elicitation_request_id = RequestId::Integer(0);
let mut expected_changes = HashMap::new();
expected_changes.insert(
test_file.as_path().to_path_buf(),
FileChange::Update {
unified_diff: "@@ -1 +1 @@\n-original content\n+modified content\n".to_string(),
move_path: None,
},
);
let expected_elicitation_request = create_expected_patch_approval_elicitation_request(
elicitation_request_id.clone(),
expected_changes,
None, // No grant_root expected
None, // No reason expected
codex_request_id.to_string(),
"1".to_string(),
)?;
assert_eq!(expected_elicitation_request, elicitation_request);
// Accept the patch approval request by responding to the elicitation
mcp_process
.send_response(
elicitation_request_id,
serde_json::to_value(PatchApprovalResponse {
decision: ReviewDecision::Approved,
})?,
)
.await?;
// Verify the original `codex` tool call completes
let codex_response = timeout(
DEFAULT_READ_TIMEOUT,
mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
)
.await??;
assert_eq!(
JSONRPCResponse {
jsonrpc: JSONRPC_VERSION.into(),
id: RequestId::Integer(codex_request_id),
result: json!({
"content": [
{
"text": "Patch has been applied successfully!",
"type": "text"
}
]
}),
},
codex_response
);
let file_contents = std::fs::read_to_string(test_file.as_path())?;
assert_eq!(file_contents, "modified content\n");
Ok(())
}
fn create_expected_patch_approval_elicitation_request(
elicitation_request_id: RequestId,
changes: HashMap<PathBuf, FileChange>,
grant_root: Option<PathBuf>,
reason: Option<String>,
codex_mcp_tool_call_id: String,
codex_event_id: String,
) -> anyhow::Result<JSONRPCRequest> {
let mut message_lines = Vec::new();
if let Some(r) = &reason {
message_lines.push(r.clone());
}
message_lines.push("Allow Codex to apply proposed code changes?".to_string());
Ok(JSONRPCRequest {
jsonrpc: JSONRPC_VERSION.into(),
id: elicitation_request_id,
method: ElicitRequest::METHOD.to_string(),
params: Some(serde_json::to_value(&PatchApprovalElicitRequestParams {
message: message_lines.join("\n"),
requested_schema: ElicitRequestParamsRequestedSchema {
r#type: "object".to_string(),
properties: json!({}),
required: None,
},
codex_elicitation: "patch-approval".to_string(),
codex_mcp_tool_call_id,
codex_event_id,
codex_reason: reason,
codex_grant_root: grant_root,
codex_changes: changes,
})?),
})
}
/// This handle is used to ensure that the MockServer and TempDir are not dropped while
/// the McpProcess is still running.
pub struct McpHandle {
pub process: McpProcess,
/// Retain the server for the lifetime of the McpProcess.
#[allow(dead_code)]
server: MockServer,
/// Retain the temporary directory for the lifetime of the McpProcess.
#[allow(dead_code)]
dir: TempDir,
}
async fn create_mcp_process(responses: Vec<String>) -> anyhow::Result<McpHandle> {
let server = create_mock_chat_completions_server(responses).await;
let codex_home = TempDir::new()?;
create_config_toml(codex_home.path(), &server.uri())?;
let mut mcp_process = McpProcess::new(codex_home.path()).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
Ok(McpHandle {
process: mcp_process,
server,
dir: codex_home,
})
}
/// Create a Codex config that uses the mock server as the model provider.
/// It also uses `approval_policy = "untrusted"` so that we exercise the
/// elicitation code path for shell commands.
fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
let config_toml = codex_home.join("config.toml");
std::fs::write(
config_toml,
format!(
r#"
model = "mock-model"
approval_policy = "untrusted"
sandbox_policy = "read-only"
model_provider = "mock_provider"
[model_providers.mock_provider]
name = "Mock provider for test"
base_url = "{server_uri}/v1"
wire_api = "chat"
request_max_retries = 0
stream_max_retries = 0
"#
),
)
}