codex-rs/mcp-server/tests/suite/codex_tool.rs

use std::collections::HashMap;
use std::env;
use std::path::Path;
use std::path::PathBuf;

use codex_core::parse_command;
use codex_core::protocol::FileChange;
use codex_core::protocol::ReviewDecision;
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use codex_mcp_server::CodexToolCallParam;
use codex_mcp_server::ExecApprovalElicitRequestParams;
use codex_mcp_server::ExecApprovalResponse;
use codex_mcp_server::PatchApprovalElicitRequestParams;
use codex_mcp_server::PatchApprovalResponse;
use mcp_types::ElicitRequest;
use mcp_types::ElicitRequestParamsRequestedSchema;
use mcp_types::JSONRPC_VERSION;
use mcp_types::JSONRPCRequest;
use mcp_types::JSONRPCResponse;
use mcp_types::ModelContextProtocolRequest;
use mcp_types::RequestId;
use pretty_assertions::assert_eq;
use serde_json::json;
use tempfile::TempDir;
use tokio::time::timeout;
use wiremock::MockServer;

use core_test_support::skip_if_no_network;
use mcp_test_support::McpProcess;
use mcp_test_support::create_apply_patch_sse_response;
use mcp_test_support::create_final_assistant_message_sse_response;
use mcp_test_support::create_mock_chat_completions_server;
use mcp_test_support::create_shell_sse_response;

// Allow ample time on slower CI or under load to avoid flakes.
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20);

/// Test that a shell command that is not on the "trusted" list triggers an
/// elicitation request to the MCP and that sending the approval runs the
/// command, as expected.
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn test_shell_command_approval_triggers_elicitation() {
    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
        return;
    }

    // Apparently `#[tokio::test]` must return `()`, so we create a helper
    // function that returns `Result` so we can use `?` in favor of `unwrap`.
    if let Err(err) = shell_command_approval_triggers_elicitation().await {
        panic!("failure: {err}");
    }
}

async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
    // Use a simple, untrusted command that creates a file so we can
    // observe a side-effect.
    //
    // Cross‑platform approach: run a tiny Python snippet to touch the file
    // using `python3 -c ...` on all platforms.
    let workdir_for_shell_function_call = TempDir::new()?;
    let created_filename = "created_by_shell_tool.txt";
    let created_file = workdir_for_shell_function_call
        .path()
        .join(created_filename);

    let shell_command = vec![
        "python3".to_string(),
        "-c".to_string(),
        format!("import pathlib; pathlib.Path('{created_filename}').touch()"),
    ];

    let McpHandle {
        process: mut mcp_process,
        server: _server,
        dir: _dir,
    } = create_mcp_process(vec![
        create_shell_sse_response(
            shell_command.clone(),
            Some(workdir_for_shell_function_call.path()),
            Some(5_000),
            "call1234",
        )?,
        create_final_assistant_message_sse_response("File created!")?,
    ])
    .await?;

    // Send a "codex" tool request, which should hit the completions endpoint.
    // In turn, it should reply with a tool call, which the MCP should forward
    // as an elicitation.
    let codex_request_id = mcp_process
        .send_codex_tool_call(CodexToolCallParam {
            prompt: "run `git init`".to_string(),
            ..Default::default()
        })
        .await?;
    let elicitation_request = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp_process.read_stream_until_request_message(),
    )
    .await??;

    let elicitation_request_id = elicitation_request.id.clone();
    let params = serde_json::from_value::<ExecApprovalElicitRequestParams>(
        elicitation_request
            .params
            .clone()
            .ok_or_else(|| anyhow::anyhow!("elicitation_request.params must be set"))?,
    )?;
    let expected_elicitation_request = create_expected_elicitation_request(
        elicitation_request_id.clone(),
        shell_command.clone(),
        workdir_for_shell_function_call.path(),
        codex_request_id.to_string(),
        params.codex_event_id.clone(),
    )?;
    assert_eq!(expected_elicitation_request, elicitation_request);

    // Accept the `git init` request by responding to the elicitation.
    mcp_process
        .send_response(
            elicitation_request_id,
            serde_json::to_value(ExecApprovalResponse {
                decision: ReviewDecision::Approved,
            })?,
        )
        .await?;

    // Verify task_complete notification arrives before the tool call completes.
    #[expect(clippy::expect_used)]
    let _task_complete = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp_process.read_stream_until_legacy_task_complete_notification(),
    )
    .await
    .expect("task_complete_notification timeout")
    .expect("task_complete_notification resp");

    // Verify the original `codex` tool call completes and that the file was created.
    let codex_response = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
    )
    .await??;
    assert_eq!(
        JSONRPCResponse {
            jsonrpc: JSONRPC_VERSION.into(),
            id: RequestId::Integer(codex_request_id),
            result: json!({
                "content": [
                    {
                        "text": "File created!",
                        "type": "text"
                    }
                ]
            }),
        },
        codex_response
    );

    assert!(created_file.is_file(), "created file should exist");

    Ok(())
}

fn create_expected_elicitation_request(
    elicitation_request_id: RequestId,
    command: Vec<String>,
    workdir: &Path,
    codex_mcp_tool_call_id: String,
    codex_event_id: String,
) -> anyhow::Result<JSONRPCRequest> {
    let expected_message = format!(
        "Allow Codex to run `{}` in `{}`?",
        shlex::try_join(command.iter().map(std::convert::AsRef::as_ref))?,
        workdir.to_string_lossy()
    );
    let codex_parsed_cmd = parse_command::parse_command(&command);
    Ok(JSONRPCRequest {
        jsonrpc: JSONRPC_VERSION.into(),
        id: elicitation_request_id,
        method: ElicitRequest::METHOD.to_string(),
        params: Some(serde_json::to_value(&ExecApprovalElicitRequestParams {
            message: expected_message,
            requested_schema: ElicitRequestParamsRequestedSchema {
                r#type: "object".to_string(),
                properties: json!({}),
                required: None,
            },
            codex_elicitation: "exec-approval".to_string(),
            codex_mcp_tool_call_id,
            codex_event_id,
            codex_command: command,
            codex_cwd: workdir.to_path_buf(),
            codex_call_id: "call1234".to_string(),
            codex_parsed_cmd,
            codex_risk: None,
        })?),
    })
}

/// Test that patch approval triggers an elicitation request to the MCP and that
/// sending the approval applies the patch, as expected.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_patch_approval_triggers_elicitation() {
    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
        return;
    }

    if let Err(err) = patch_approval_triggers_elicitation().await {
        panic!("failure: {err}");
    }
}

async fn patch_approval_triggers_elicitation() -> anyhow::Result<()> {
    let cwd = TempDir::new()?;
    let test_file = cwd.path().join("destination_file.txt");
    std::fs::write(&test_file, "original content\n")?;

    let patch_content = format!(
        "*** Begin Patch\n*** Update File: {}\n-original content\n+modified content\n*** End Patch",
        test_file.as_path().to_string_lossy()
    );

    let McpHandle {
        process: mut mcp_process,
        server: _server,
        dir: _dir,
    } = create_mcp_process(vec![
        create_apply_patch_sse_response(&patch_content, "call1234")?,
        create_final_assistant_message_sse_response("Patch has been applied successfully!")?,
    ])
    .await?;

    // Send a "codex" tool request that will trigger the apply_patch command
    let codex_request_id = mcp_process
        .send_codex_tool_call(CodexToolCallParam {
            cwd: Some(cwd.path().to_string_lossy().to_string()),
            prompt: "please modify the test file".to_string(),
            ..Default::default()
        })
        .await?;
    let elicitation_request = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp_process.read_stream_until_request_message(),
    )
    .await??;

    let elicitation_request_id = RequestId::Integer(0);

    let mut expected_changes = HashMap::new();
    expected_changes.insert(
        test_file.as_path().to_path_buf(),
        FileChange::Update {
            unified_diff: "@@ -1 +1 @@\n-original content\n+modified content\n".to_string(),
            move_path: None,
        },
    );

    let expected_elicitation_request = create_expected_patch_approval_elicitation_request(
        elicitation_request_id.clone(),
        expected_changes,
        None, // No grant_root expected
        None, // No reason expected
        codex_request_id.to_string(),
        "1".to_string(),
    )?;
    assert_eq!(expected_elicitation_request, elicitation_request);

    // Accept the patch approval request by responding to the elicitation
    mcp_process
        .send_response(
            elicitation_request_id,
            serde_json::to_value(PatchApprovalResponse {
                decision: ReviewDecision::Approved,
            })?,
        )
        .await?;

    // Verify the original `codex` tool call completes
    let codex_response = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
    )
    .await??;
    assert_eq!(
        JSONRPCResponse {
            jsonrpc: JSONRPC_VERSION.into(),
            id: RequestId::Integer(codex_request_id),
            result: json!({
                "content": [
                    {
                        "text": "Patch has been applied successfully!",
                        "type": "text"
                    }
                ]
            }),
        },
        codex_response
    );

    let file_contents = std::fs::read_to_string(test_file.as_path())?;
    assert_eq!(file_contents, "modified content\n");

    Ok(())
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_codex_tool_passes_base_instructions() {
    skip_if_no_network!();

    // Apparently `#[tokio::test]` must return `()`, so we create a helper
    // function that returns `Result` so we can use `?` in favor of `unwrap`.
    if let Err(err) = codex_tool_passes_base_instructions().await {
        panic!("failure: {err}");
    }
}

async fn codex_tool_passes_base_instructions() -> anyhow::Result<()> {
    #![expect(clippy::unwrap_used)]

    let server =
        create_mock_chat_completions_server(vec![create_final_assistant_message_sse_response(
            "Enjoy!",
        )?])
        .await;

    // Run `codex mcp` with a specific config.toml.
    let codex_home = TempDir::new()?;
    create_config_toml(codex_home.path(), &server.uri())?;
    let mut mcp_process = McpProcess::new(codex_home.path()).await?;
    timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;

    // Send a "codex" tool request, which should hit the completions endpoint.
    let codex_request_id = mcp_process
        .send_codex_tool_call(CodexToolCallParam {
            prompt: "How are you?".to_string(),
            base_instructions: Some("You are a helpful assistant.".to_string()),
            developer_instructions: Some("Foreshadow upcoming tool calls.".to_string()),
            ..Default::default()
        })
        .await?;

    let codex_response = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
    )
    .await??;
    assert_eq!(
        JSONRPCResponse {
            jsonrpc: JSONRPC_VERSION.into(),
            id: RequestId::Integer(codex_request_id),
            result: json!({
                "content": [
                    {
                        "text": "Enjoy!",
                        "type": "text"
                    }
                ]
            }),
        },
        codex_response
    );

    let requests = server.received_requests().await.unwrap();
    let request = requests[0].body_json::<serde_json::Value>()?;
    let instructions = request["messages"][0]["content"].as_str().unwrap();
    assert!(instructions.starts_with("You are a helpful assistant."));

    let developer_msg = request["messages"]
        .as_array()
        .and_then(|messages| {
            messages
                .iter()
                .find(|msg| msg.get("role").and_then(|role| role.as_str()) == Some("developer"))
        })
        .unwrap();
    let developer_content = developer_msg
        .get("content")
        .and_then(|value| value.as_str())
        .unwrap();
    assert!(
        !developer_content.contains('<'),
        "expected developer instructions without XML tags, got `{developer_content}`"
    );
    assert_eq!(developer_content, "Foreshadow upcoming tool calls.");

    Ok(())
}

fn create_expected_patch_approval_elicitation_request(
    elicitation_request_id: RequestId,
    changes: HashMap<PathBuf, FileChange>,
    grant_root: Option<PathBuf>,
    reason: Option<String>,
    codex_mcp_tool_call_id: String,
    codex_event_id: String,
) -> anyhow::Result<JSONRPCRequest> {
    let mut message_lines = Vec::new();
    if let Some(r) = &reason {
        message_lines.push(r.clone());
    }
    message_lines.push("Allow Codex to apply proposed code changes?".to_string());

    Ok(JSONRPCRequest {
        jsonrpc: JSONRPC_VERSION.into(),
        id: elicitation_request_id,
        method: ElicitRequest::METHOD.to_string(),
        params: Some(serde_json::to_value(&PatchApprovalElicitRequestParams {
            message: message_lines.join("\n"),
            requested_schema: ElicitRequestParamsRequestedSchema {
                r#type: "object".to_string(),
                properties: json!({}),
                required: None,
            },
            codex_elicitation: "patch-approval".to_string(),
            codex_mcp_tool_call_id,
            codex_event_id,
            codex_reason: reason,
            codex_grant_root: grant_root,
            codex_changes: changes,
            codex_call_id: "call1234".to_string(),
        })?),
    })
}

/// This handle is used to ensure that the MockServer and TempDir are not dropped while
/// the McpProcess is still running.
pub struct McpHandle {
    pub process: McpProcess,
    /// Retain the server for the lifetime of the McpProcess.
    #[allow(dead_code)]
    server: MockServer,
    /// Retain the temporary directory for the lifetime of the McpProcess.
    #[allow(dead_code)]
    dir: TempDir,
}

async fn create_mcp_process(responses: Vec<String>) -> anyhow::Result<McpHandle> {
    let server = create_mock_chat_completions_server(responses).await;
    let codex_home = TempDir::new()?;
    create_config_toml(codex_home.path(), &server.uri())?;
    let mut mcp_process = McpProcess::new(codex_home.path()).await?;
    timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
    Ok(McpHandle {
        process: mcp_process,
        server,
        dir: codex_home,
    })
}

/// Create a Codex config that uses the mock server as the model provider.
/// It also uses `approval_policy = "untrusted"` so that we exercise the
/// elicitation code path for shell commands.
fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
    let config_toml = codex_home.join("config.toml");
    std::fs::write(
        config_toml,
        format!(
            r#"
model = "mock-model"
approval_policy = "untrusted"
sandbox_policy = "workspace-write"

model_provider = "mock_provider"

[model_providers.mock_provider]
name = "Mock provider for test"
base_url = "{server_uri}/v1"
wire_api = "chat"
request_max_retries = 0
stream_max_retries = 0
"#
        ),
    )
}
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								use std::collections::HashMap;
 								use std::env;
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								use std::path::Path;
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								use std::path::PathBuf;
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
-												feat: add Vec<ParsedCommand> to ExecApprovalRequestEvent (#5222)

This adds `parsed_cmd: Vec<ParsedCommand>` to `ExecApprovalRequestEvent`
in the core protocol (`protocol/src/protocol.rs`), which is also what
this field is named on `ExecCommandBeginEvent`. Honestly, I don't love
the name (it sounds like a single command, but it is actually a list of
them), but I don't want to get distracted by a naming discussion right
now.

This also adds `parsed_cmd` to `ExecCommandApprovalParams` in
`codex-rs/app-server-protocol/src/protocol.rs`, so it will be available
via `codex app-server`, as well.

For consistency, I also updated `ExecApprovalElicitRequestParams` in
`codex-rs/mcp-server/src/exec_approval.rs` to include this field under
the name `codex_parsed_cmd`, as that struct already has a number of
special `codex_*` fields. Note this is the code for when Codex is used
as an MCP _server_ and therefore has to conform to the official spec for
an MCP elicitation type.
											
										
										
											2025-10-15 13:58:40 -07:00
+								use codex_core::parse_command;
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								use codex_core::protocol::FileChange;
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								use codex_core::protocol::ReviewDecision;
-												chore: refactor exec.rs: create separate seatbelt.rs and spawn.rs files (#1762)

At 550 lines, `exec.rs` was a bit large. In particular, I found it hard
to locate the Seatbelt-related code quickly without a file with
`seatbelt` in the name, so this refactors things so:

- `spawn_command_under_seatbelt()` and dependent code moves to a new
`seatbelt.rs` file
- `spawn_child_async()` and dependent code moves to a new `spawn.rs`
file
											
										
										
											2025-07-31 13:11:47 -07:00
+								use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
+								use codex_mcp_server::CodexToolCallParam;
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								use codex_mcp_server::ExecApprovalElicitRequestParams;
 								use codex_mcp_server::ExecApprovalResponse;
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								use codex_mcp_server::PatchApprovalElicitRequestParams;
 								use codex_mcp_server::PatchApprovalResponse;
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								use mcp_types::ElicitRequest;
 								use mcp_types::ElicitRequestParamsRequestedSchema;
 								use mcp_types::JSONRPC_VERSION;
 								use mcp_types::JSONRPCRequest;
 								use mcp_types::JSONRPCResponse;
 								use mcp_types::ModelContextProtocolRequest;
 								use mcp_types::RequestId;
 								use pretty_assertions::assert_eq;
 								use serde_json::json;
 								use tempfile::TempDir;
 								use tokio::time::timeout;
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								use wiremock::MockServer;
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
-												make tests pass cleanly in sandbox (#4067)

This changes the reqwest client used in tests to be sandbox-friendly,
and skips a bunch of other tests that don't work inside the
sandbox/without network.
											
										
										
											2025-09-25 13:11:14 -07:00
+								use core_test_support::skip_if_no_network;
-												fix: create separate test_support crates to eliminate #[allow(dead_code)] (#1667)

Because of a quirk of how implementation tests work in Rust, we had a
number of `#[allow(dead_code)]` annotations that were misleading because
the functions _were_ being used, just not by all integration tests in a
`tests/` folder, so when compiling the test that did not use the
function, clippy would complain that it was unused.

This fixes things by create a "test_support" crate under the `tests/`
folder that is imported as a dev dependency for the respective crate.
											
										
										
											2025-07-24 12:19:46 -07:00
+								use mcp_test_support::McpProcess;
 								use mcp_test_support::create_apply_patch_sse_response;
 								use mcp_test_support::create_final_assistant_message_sse_response;
 								use mcp_test_support::create_mock_chat_completions_server;
 								use mcp_test_support::create_shell_sse_response;
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
-												chore: try to make it easier to debug the flakiness of test_shell_command_approval_triggers_elicitation (#2848)

`test_shell_command_approval_triggers_elicitation()` is one of a number
of integration tests that we have observed to be flaky on GitHub CI, so
this PR tries to reduce the flakiness _and_ to provide us with more
information when it flakes. Specifically:

- Changed the command that we use to trigger the elicitation from `git
init` to `python3 -c 'import pathlib; pathlib.Path(r"{}").touch()'`
because running `git` seems more likely to invite variance.
- Increased the timeout to wait for the task response from 10s to 20s.
- Added more logging.
											
										
										
											2025-08-28 12:33:33 -07:00
+								// Allow ample time on slower CI or under load to avoid flakes.
 								const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20);
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
 								/// Test that a shell command that is not on the "trusted" list triggers an
 								/// elicitation request to the MCP and that sending the approval runs the
 								/// command, as expected.
-												fix: try to fix flakiness in test_shell_command_approval_triggers_elicitation (#2344)

I still see flakiness in
`test_shell_command_approval_triggers_elicitation()` on occasion where
`MockServer` claims it has not received all of its expected requests.

I recently introduced a similar type of test in #2264,
`test_codex_jsonrpc_conversation_flow()`, which I have not seen flake
(yet!), so this PR pulls over two things I did in that test:

- increased `worker_threads` from `2` to `4`
- added an assertion to make sure the `task_complete` notification is
received

Honestly, I'm still not sure why `MockServer` claims it sometimes does
not receive all its expected requests given that we assert that the
final `JSONRPCResponse` is read on the stream, but let's give this a
shot.

Assuming this fixes things, my hypothesis is that the increase in
`worker_threads` helps because perhaps there are async tasks in
`MockServer` that do not reliably complete fully when there are not
enough threads available? If that is correct, it seems like the test
would still be flaky, though perhaps with lower frequency?
											
										
										
											2025-08-15 09:17:20 -07:00
+								#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								async fn test_shell_command_approval_triggers_elicitation() {
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								        println!(
 								            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
 								        );
 								        return;
 								    }
 								    // Apparently `#[tokio::test]` must return `()`, so we create a helper
 								    // function that returns `Result` so we can use `?` in favor of `unwrap`.
 								    if let Err(err) = shell_command_approval_triggers_elicitation().await {
 								        panic!("failure: {err}");
 								    }
 								}
 								async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
-												chore: try to make it easier to debug the flakiness of test_shell_command_approval_triggers_elicitation (#2848)

`test_shell_command_approval_triggers_elicitation()` is one of a number
of integration tests that we have observed to be flaky on GitHub CI, so
this PR tries to reduce the flakiness _and_ to provide us with more
information when it flakes. Specifically:

- Changed the command that we use to trigger the elicitation from `git
init` to `python3 -c 'import pathlib; pathlib.Path(r"{}").touch()'`
because running `git` seems more likely to invite variance.
- Increased the timeout to wait for the task response from 10s to 20s.
- Added more logging.
											
										
										
											2025-08-28 12:33:33 -07:00
+								    // Use a simple, untrusted command that creates a file so we can
 								    // observe a side-effect.
 								    //
 								    // Cross‑platform approach: run a tiny Python snippet to touch the file
 								    // using `python3 -c ...` on all platforms.
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								    let workdir_for_shell_function_call = TempDir::new()?;
-												chore: try to make it easier to debug the flakiness of test_shell_command_approval_triggers_elicitation (#2848)

`test_shell_command_approval_triggers_elicitation()` is one of a number
of integration tests that we have observed to be flaky on GitHub CI, so
this PR tries to reduce the flakiness _and_ to provide us with more
information when it flakes. Specifically:

- Changed the command that we use to trigger the elicitation from `git
init` to `python3 -c 'import pathlib; pathlib.Path(r"{}").touch()'`
because running `git` seems more likely to invite variance.
- Increased the timeout to wait for the task response from 10s to 20s.
- Added more logging.
											
										
										
											2025-08-28 12:33:33 -07:00
+								    let created_filename = "created_by_shell_tool.txt";
 								    let created_file = workdir_for_shell_function_call
 								        .path()
 								        .join(created_filename);
 								    let shell_command = vec![
 								        "python3".to_string(),
 								        "-c".to_string(),
 								        format!("import pathlib; pathlib.Path('{created_filename}').touch()"),
 								    ];
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								    let McpHandle {
 								        process: mut mcp_process,
 								        server: _server,
 								        dir: _dir,
 								    } = create_mcp_process(vec![
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								        create_shell_sse_response(
 								            shell_command.clone(),
 								            Some(workdir_for_shell_function_call.path()),
 								            Some(5_000),
 								            "call1234",
 								        )?,
-												chore: try to make it easier to debug the flakiness of test_shell_command_approval_triggers_elicitation (#2848)

`test_shell_command_approval_triggers_elicitation()` is one of a number
of integration tests that we have observed to be flaky on GitHub CI, so
this PR tries to reduce the flakiness _and_ to provide us with more
information when it flakes. Specifically:

- Changed the command that we use to trigger the elicitation from `git
init` to `python3 -c 'import pathlib; pathlib.Path(r"{}").touch()'`
because running `git` seems more likely to invite variance.
- Increased the timeout to wait for the task response from 10s to 20s.
- Added more logging.
											
										
										
											2025-08-28 12:33:33 -07:00
+								        create_final_assistant_message_sse_response("File created!")?,
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								    ])
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								    .await?;
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
 								    // Send a "codex" tool request, which should hit the completions endpoint.
 								    // In turn, it should reply with a tool call, which the MCP should forward
 								    // as an elicitation.
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								    let codex_request_id = mcp_process
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
+								        .send_codex_tool_call(CodexToolCallParam {
 								            prompt: "run `git init`".to_string(),
 								            ..Default::default()
 								        })
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								        .await?;
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								    let elicitation_request = timeout(
 								        DEFAULT_READ_TIMEOUT,
 								        mcp_process.read_stream_until_request_message(),
 								    )
 								    .await??;
-												Fix flakiness in shell command approval test (#2547)

## Summary
- read the shell exec approval request's actual id instead of assuming
it is always 0
- use that id when validating and responding in the test

## Testing
- `cargo test -p codex-mcp-server
test_shell_command_approval_triggers_elicitation`

------
https://chatgpt.com/codex/tasks/task_i_68a6ab9c732c832c81522cbf11812be0
											
										
										
											2025-08-22 15:46:35 -07:00
+								    let elicitation_request_id = elicitation_request.id.clone();
-												Fix flaky test_shell_command_approval_triggers_elicitation test (#1802)

This doesn't flake very often but this should fix it.
											
										
										
											2025-08-03 07:19:12 -07:00
+								    let params = serde_json::from_value::<ExecApprovalElicitRequestParams>(
 								        elicitation_request
 								            .params
 								            .clone()
 								            .ok_or_else(|| anyhow::anyhow!("elicitation_request.params must be set"))?,
 								    )?;
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								    let expected_elicitation_request = create_expected_elicitation_request(
 								        elicitation_request_id.clone(),
 								        shell_command.clone(),
 								        workdir_for_shell_function_call.path(),
 								        codex_request_id.to_string(),
-												Fix flaky test_shell_command_approval_triggers_elicitation test (#1802)

This doesn't flake very often but this should fix it.
											
										
										
											2025-08-03 07:19:12 -07:00
+								        params.codex_event_id.clone(),
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								    )?;
 								    assert_eq!(expected_elicitation_request, elicitation_request);
 								    // Accept the `git init` request by responding to the elicitation.
 								    mcp_process
 								        .send_response(
 								            elicitation_request_id,
 								            serde_json::to_value(ExecApprovalResponse {
 								                decision: ReviewDecision::Approved,
 								            })?,
 								        )
 								        .await?;
-												fix: try to fix flakiness in test_shell_command_approval_triggers_elicitation (#2344)

I still see flakiness in
`test_shell_command_approval_triggers_elicitation()` on occasion where
`MockServer` claims it has not received all of its expected requests.

I recently introduced a similar type of test in #2264,
`test_codex_jsonrpc_conversation_flow()`, which I have not seen flake
(yet!), so this PR pulls over two things I did in that test:

- increased `worker_threads` from `2` to `4`
- added an assertion to make sure the `task_complete` notification is
received

Honestly, I'm still not sure why `MockServer` claims it sometimes does
not receive all its expected requests given that we assert that the
final `JSONRPCResponse` is read on the stream, but let's give this a
shot.

Assuming this fixes things, my hypothesis is that the increase in
`worker_threads` helps because perhaps there are async tasks in
`MockServer` that do not reliably complete fully when there are not
enough threads available? If that is correct, it seems like the test
would still be flaky, though perhaps with lower frequency?
											
										
										
											2025-08-15 09:17:20 -07:00
+								    // Verify task_complete notification arrives before the tool call completes.
 								    #[expect(clippy::expect_used)]
 								    let _task_complete = timeout(
 								        DEFAULT_READ_TIMEOUT,
 								        mcp_process.read_stream_until_legacy_task_complete_notification(),
 								    )
 								    .await
 								    .expect("task_complete_notification timeout")
 								    .expect("task_complete_notification resp");
-												chore: try to make it easier to debug the flakiness of test_shell_command_approval_triggers_elicitation (#2848)

`test_shell_command_approval_triggers_elicitation()` is one of a number
of integration tests that we have observed to be flaky on GitHub CI, so
this PR tries to reduce the flakiness _and_ to provide us with more
information when it flakes. Specifically:

- Changed the command that we use to trigger the elicitation from `git
init` to `python3 -c 'import pathlib; pathlib.Path(r"{}").touch()'`
because running `git` seems more likely to invite variance.
- Increased the timeout to wait for the task response from 10s to 20s.
- Added more logging.
											
										
										
											2025-08-28 12:33:33 -07:00
+								    // Verify the original `codex` tool call completes and that the file was created.
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								    let codex_response = timeout(
 								        DEFAULT_READ_TIMEOUT,
 								        mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
 								    )
 								    .await??;
 								    assert_eq!(
 								        JSONRPCResponse {
 								            jsonrpc: JSONRPC_VERSION.into(),
 								            id: RequestId::Integer(codex_request_id),
 								            result: json!({
 								                "content": [
 								                    {
-												chore: try to make it easier to debug the flakiness of test_shell_command_approval_triggers_elicitation (#2848)

`test_shell_command_approval_triggers_elicitation()` is one of a number
of integration tests that we have observed to be flaky on GitHub CI, so
this PR tries to reduce the flakiness _and_ to provide us with more
information when it flakes. Specifically:

- Changed the command that we use to trigger the elicitation from `git
init` to `python3 -c 'import pathlib; pathlib.Path(r"{}").touch()'`
because running `git` seems more likely to invite variance.
- Increased the timeout to wait for the task response from 10s to 20s.
- Added more logging.
											
										
										
											2025-08-28 12:33:33 -07:00
+								                        "text": "File created!",
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								                        "type": "text"
 								                    }
 								                ]
 								            }),
 								        },
 								        codex_response
 								    );
-												chore: try to make it easier to debug the flakiness of test_shell_command_approval_triggers_elicitation (#2848)

`test_shell_command_approval_triggers_elicitation()` is one of a number
of integration tests that we have observed to be flaky on GitHub CI, so
this PR tries to reduce the flakiness _and_ to provide us with more
information when it flakes. Specifically:

- Changed the command that we use to trigger the elicitation from `git
init` to `python3 -c 'import pathlib; pathlib.Path(r"{}").touch()'`
because running `git` seems more likely to invite variance.
- Increased the timeout to wait for the task response from 10s to 20s.
- Added more logging.
											
										
										
											2025-08-28 12:33:33 -07:00
+								    assert!(created_file.is_file(), "created file should exist");
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
 								    Ok(())
 								}
 								fn create_expected_elicitation_request(
 								    elicitation_request_id: RequestId,
 								    command: Vec<String>,
 								    workdir: &Path,
 								    codex_mcp_tool_call_id: String,
 								    codex_event_id: String,
 								) -> anyhow::Result<JSONRPCRequest> {
 								    let expected_message = format!(
 								        "Allow Codex to run `{}` in `{}`?",
-												chore: clippy on redundant closure (#4058)

Add redundant closure clippy rules and let Codex fix it by minimising
FQP
											
										
										
											2025-09-22 20:30:16 +01:00
+								        shlex::try_join(command.iter().map(std::convert::AsRef::as_ref))?,
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								        workdir.to_string_lossy()
 								    );
-												feat: add Vec<ParsedCommand> to ExecApprovalRequestEvent (#5222)

This adds `parsed_cmd: Vec<ParsedCommand>` to `ExecApprovalRequestEvent`
in the core protocol (`protocol/src/protocol.rs`), which is also what
this field is named on `ExecCommandBeginEvent`. Honestly, I don't love
the name (it sounds like a single command, but it is actually a list of
them), but I don't want to get distracted by a naming discussion right
now.

This also adds `parsed_cmd` to `ExecCommandApprovalParams` in
`codex-rs/app-server-protocol/src/protocol.rs`, so it will be available
via `codex app-server`, as well.

For consistency, I also updated `ExecApprovalElicitRequestParams` in
`codex-rs/mcp-server/src/exec_approval.rs` to include this field under
the name `codex_parsed_cmd`, as that struct already has a number of
special `codex_*` fields. Note this is the code for when Codex is used
as an MCP _server_ and therefore has to conform to the official spec for
an MCP elicitation type.
											
										
										
											2025-10-15 13:58:40 -07:00
+								    let codex_parsed_cmd = parse_command::parse_command(&command);
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								    Ok(JSONRPCRequest {
 								        jsonrpc: JSONRPC_VERSION.into(),
 								        id: elicitation_request_id,
 								        method: ElicitRequest::METHOD.to_string(),
 								        params: Some(serde_json::to_value(&ExecApprovalElicitRequestParams {
 								            message: expected_message,
 								            requested_schema: ElicitRequestParamsRequestedSchema {
 								                r#type: "object".to_string(),
 								                properties: json!({}),
 								                required: None,
 								            },
 								            codex_elicitation: "exec-approval".to_string(),
 								            codex_mcp_tool_call_id,
 								            codex_event_id,
 								            codex_command: command,
 								            codex_cwd: workdir.to_path_buf(),
-												Add call_id to patch approvals and elicitations (#1660)

Builds on https://github.com/openai/codex/pull/1659 and adds call_id to
a few more places for the same reason.
											
										
										
											2025-07-23 12:55:35 -07:00
+								            codex_call_id: "call1234".to_string(),
-												feat: add Vec<ParsedCommand> to ExecApprovalRequestEvent (#5222)

This adds `parsed_cmd: Vec<ParsedCommand>` to `ExecApprovalRequestEvent`
in the core protocol (`protocol/src/protocol.rs`), which is also what
this field is named on `ExecCommandBeginEvent`. Honestly, I don't love
the name (it sounds like a single command, but it is actually a list of
them), but I don't want to get distracted by a naming discussion right
now.

This also adds `parsed_cmd` to `ExecCommandApprovalParams` in
`codex-rs/app-server-protocol/src/protocol.rs`, so it will be available
via `codex app-server`, as well.

For consistency, I also updated `ExecApprovalElicitRequestParams` in
`codex-rs/mcp-server/src/exec_approval.rs` to include this field under
the name `codex_parsed_cmd`, as that struct already has a number of
special `codex_*` fields. Note this is the code for when Codex is used
as an MCP _server_ and therefore has to conform to the official spec for
an MCP elicitation type.
											
										
										
											2025-10-15 13:58:40 -07:00
+								            codex_parsed_cmd,
-												Added model summary and risk assessment for commands that violate sandbox policy (#5536)

This PR adds support for a model-based summary and risk assessment for
commands that violate the sandbox policy and require user approval. This
aids the user in evaluating whether the command should be approved.

The feature works by taking a failed command and passing it back to the
model and asking it to summarize the command, give it a risk level (low,
medium, high) and a risk category (e.g. "data deletion" or "data
exfiltration"). It uses a new conversation thread so the context in the
existing thread doesn't influence the answer. If the call to the model
fails or takes longer than 5 seconds, it falls back to the current
behavior.

For now, this is an experimental feature and is gated by a config key
`experimental_sandbox_command_assessment`.

Here is a screen shot of the approval prompt showing the risk assessment
and summary.

<img width="723" height="282" alt="image"
src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b"
/>
											
										
										
											2025-10-24 17:23:44 -05:00
+								            codex_risk: None,
-												test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
											
										
										
											2025-07-21 10:27:07 -07:00
+								        })?),
 								    })
 								}
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
 								/// Test that patch approval triggers an elicitation request to the MCP and that
 								/// sending the approval applies the patch, as expected.
 								#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 								async fn test_patch_approval_triggers_elicitation() {
 								    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
 								        println!(
 								            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
 								        );
 								        return;
 								    }
 								    if let Err(err) = patch_approval_triggers_elicitation().await {
 								        panic!("failure: {err}");
 								    }
 								}
 								async fn patch_approval_triggers_elicitation() -> anyhow::Result<()> {
 								    let cwd = TempDir::new()?;
 								    let test_file = cwd.path().join("destination_file.txt");
 								    std::fs::write(&test_file, "original content\n")?;
 								    let patch_content = format!(
 								        "*** Begin Patch\n*** Update File: {}\n-original content\n+modified content\n*** End Patch",
 								        test_file.as_path().to_string_lossy()
 								    );
 								    let McpHandle {
 								        process: mut mcp_process,
 								        server: _server,
 								        dir: _dir,
 								    } = create_mcp_process(vec![
 								        create_apply_patch_sse_response(&patch_content, "call1234")?,
 								        create_final_assistant_message_sse_response("Patch has been applied successfully!")?,
 								    ])
 								    .await?;
 								    // Send a "codex" tool request that will trigger the apply_patch command
 								    let codex_request_id = mcp_process
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
+								        .send_codex_tool_call(CodexToolCallParam {
 								            cwd: Some(cwd.path().to_string_lossy().to_string()),
 								            prompt: "please modify the test file".to_string(),
 								            ..Default::default()
 								        })
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								        .await?;
 								    let elicitation_request = timeout(
 								        DEFAULT_READ_TIMEOUT,
 								        mcp_process.read_stream_until_request_message(),
 								    )
 								    .await??;
 								    let elicitation_request_id = RequestId::Integer(0);
 								    let mut expected_changes = HashMap::new();
 								    expected_changes.insert(
 								        test_file.as_path().to_path_buf(),
 								        FileChange::Update {
 								            unified_diff: "@@ -1 +1 @@\n-original content\n+modified content\n".to_string(),
 								            move_path: None,
 								        },
 								    );
 								    let expected_elicitation_request = create_expected_patch_approval_elicitation_request(
 								        elicitation_request_id.clone(),
 								        expected_changes,
 								        None, // No grant_root expected
 								        None, // No reason expected
 								        codex_request_id.to_string(),
 								        "1".to_string(),
 								    )?;
 								    assert_eq!(expected_elicitation_request, elicitation_request);
 								    // Accept the patch approval request by responding to the elicitation
 								    mcp_process
 								        .send_response(
 								            elicitation_request_id,
 								            serde_json::to_value(PatchApprovalResponse {
 								                decision: ReviewDecision::Approved,
 								            })?,
 								        )
 								        .await?;
 								    // Verify the original `codex` tool call completes
 								    let codex_response = timeout(
 								        DEFAULT_READ_TIMEOUT,
 								        mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
 								    )
 								    .await??;
 								    assert_eq!(
 								        JSONRPCResponse {
 								            jsonrpc: JSONRPC_VERSION.into(),
 								            id: RequestId::Integer(codex_request_id),
 								            result: json!({
 								                "content": [
 								                    {
 								                        "text": "Patch has been applied successfully!",
 								                        "type": "text"
 								                    }
 								                ]
 								            }),
 								        },
 								        codex_response
 								    );
 								    let file_contents = std::fs::read_to_string(test_file.as_path())?;
 								    assert_eq!(file_contents, "modified content\n");
 								    Ok(())
 								}
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
+								#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 								async fn test_codex_tool_passes_base_instructions() {
-												make tests pass cleanly in sandbox (#4067)

This changes the reqwest client used in tests to be sandbox-friendly,
and skips a bunch of other tests that don't work inside the
sandbox/without network.
											
										
										
											2025-09-25 13:11:14 -07:00
+								    skip_if_no_network!();
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
 								    // Apparently `#[tokio::test]` must return `()`, so we create a helper
 								    // function that returns `Result` so we can use `?` in favor of `unwrap`.
 								    if let Err(err) = codex_tool_passes_base_instructions().await {
 								        panic!("failure: {err}");
 								    }
 								}
 								async fn codex_tool_passes_base_instructions() -> anyhow::Result<()> {
-												Added `allow-expect-in-tests` / `allow-unwrap-in-tests` (#2328)

This PR:
* Added the clippy.toml to configure allowable expect / unwrap usage in
tests
* Removed as many expect/allow lines as possible from tests
* moved a bunch of allows to expects where possible

Note: in integration tests, non `#[test]` helper functions are not
covered by this so we had to leave a few lingering `expect(expect_used`
checks around
											
										
										
											2025-08-14 17:59:01 -07:00
+								    #![expect(clippy::unwrap_used)]
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
 								    let server =
 								        create_mock_chat_completions_server(vec![create_final_assistant_message_sse_response(
 								            "Enjoy!",
 								        )?])
 								        .await;
 								    // Run `codex mcp` with a specific config.toml.
 								    let codex_home = TempDir::new()?;
 								    create_config_toml(codex_home.path(), &server.uri())?;
 								    let mut mcp_process = McpProcess::new(codex_home.path()).await?;
 								    timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
 								    // Send a "codex" tool request, which should hit the completions endpoint.
 								    let codex_request_id = mcp_process
 								        .send_codex_tool_call(CodexToolCallParam {
 								            prompt: "How are you?".to_string(),
 								            base_instructions: Some("You are a helpful assistant.".to_string()),
-												[codex] add developer instructions (#5897)

we are using developer instructions for code reviews, we need to pass
them in cli as well.
											
										
										
											2025-10-30 11:18:31 -07:00
+								            developer_instructions: Some("Foreshadow upcoming tool calls.".to_string()),
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
+								            ..Default::default()
 								        })
 								        .await?;
 								    let codex_response = timeout(
 								        DEFAULT_READ_TIMEOUT,
 								        mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
 								    )
 								    .await??;
 								    assert_eq!(
 								        JSONRPCResponse {
 								            jsonrpc: JSONRPC_VERSION.into(),
 								            id: RequestId::Integer(codex_request_id),
 								            result: json!({
 								                "content": [
 								                    {
 								                        "text": "Enjoy!",
 								                        "type": "text"
 								                    }
 								                ]
 								            }),
 								        },
 								        codex_response
 								    );
 								    let requests = server.received_requests().await.unwrap();
-												[codex] add developer instructions (#5897)

we are using developer instructions for code reviews, we need to pass
them in cli as well.
											
										
										
											2025-10-30 11:18:31 -07:00
+								    let request = requests[0].body_json::<serde_json::Value>()?;
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
+								    let instructions = request["messages"][0]["content"].as_str().unwrap();
 								    assert!(instructions.starts_with("You are a helpful assistant."));
-												[codex] add developer instructions (#5897)

we are using developer instructions for code reviews, we need to pass
them in cli as well.
											
										
										
											2025-10-30 11:18:31 -07:00
+								    let developer_msg = request["messages"]
 								        .as_array()
 								        .and_then(|messages| {
 								            messages
 								                .iter()
 								                .find(|msg| msg.get("role").and_then(|role| role.as_str()) == Some("developer"))
 								        })
 								        .unwrap();
 								    let developer_content = developer_msg
 								        .get("content")
 								        .and_then(|value| value.as_str())
 								        .unwrap();
 								    assert!(
 								        !developer_content.contains('<'),
 								        "expected developer instructions without XML tags, got `{developer_content}`"
 								    );
 								    assert_eq!(developer_content, "Foreshadow upcoming tool calls.");
-												Add support for custom base instructions (#1645)

Allows providing custom instructions file as a config parameter and
custom instruction text via MCP tool call.
											
										
										
											2025-07-22 09:42:22 -07:00
+								    Ok(())
 								}
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								fn create_expected_patch_approval_elicitation_request(
 								    elicitation_request_id: RequestId,
 								    changes: HashMap<PathBuf, FileChange>,
 								    grant_root: Option<PathBuf>,
 								    reason: Option<String>,
 								    codex_mcp_tool_call_id: String,
 								    codex_event_id: String,
 								) -> anyhow::Result<JSONRPCRequest> {
 								    let mut message_lines = Vec::new();
 								    if let Some(r) = &reason {
 								        message_lines.push(r.clone());
 								    }
 								    message_lines.push("Allow Codex to apply proposed code changes?".to_string());
 								    Ok(JSONRPCRequest {
 								        jsonrpc: JSONRPC_VERSION.into(),
 								        id: elicitation_request_id,
 								        method: ElicitRequest::METHOD.to_string(),
 								        params: Some(serde_json::to_value(&PatchApprovalElicitRequestParams {
 								            message: message_lines.join("\n"),
 								            requested_schema: ElicitRequestParamsRequestedSchema {
 								                r#type: "object".to_string(),
 								                properties: json!({}),
 								                required: None,
 								            },
 								            codex_elicitation: "patch-approval".to_string(),
 								            codex_mcp_tool_call_id,
 								            codex_event_id,
 								            codex_reason: reason,
 								            codex_grant_root: grant_root,
 								            codex_changes: changes,
-												Add call_id to patch approvals and elicitations (#1660)

Builds on https://github.com/openai/codex/pull/1659 and adds call_id to
a few more places for the same reason.
											
										
										
											2025-07-23 12:55:35 -07:00
+								            codex_call_id: "call1234".to_string(),
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
+								        })?),
 								    })
 								}
 								/// This handle is used to ensure that the MockServer and TempDir are not dropped while
 								/// the McpProcess is still running.
 								pub struct McpHandle {
 								    pub process: McpProcess,
 								    /// Retain the server for the lifetime of the McpProcess.
 								    #[allow(dead_code)]
 								    server: MockServer,
 								    /// Retain the temporary directory for the lifetime of the McpProcess.
 								    #[allow(dead_code)]
 								    dir: TempDir,
 								}
 								async fn create_mcp_process(responses: Vec<String>) -> anyhow::Result<McpHandle> {
 								    let server = create_mock_chat_completions_server(responses).await;
 								    let codex_home = TempDir::new()?;
 								    create_config_toml(codex_home.path(), &server.uri())?;
 								    let mut mcp_process = McpProcess::new(codex_home.path()).await?;
 								    timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
 								    Ok(McpHandle {
 								        process: mcp_process,
 								        server,
 								        dir: codex_home,
 								    })
 								}
 								/// Create a Codex config that uses the mock server as the model provider.
 								/// It also uses `approval_policy = "untrusted"` so that we exercise the
 								/// elicitation code path for shell commands.
 								fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
 								    let config_toml = codex_home.join("config.toml");
 								    std::fs::write(
 								        config_toml,
 								        format!(
 								            r#"
 								model = "mock-model"
 								approval_policy = "untrusted"
-												chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
											
										
										
											2025-10-20 20:57:37 +01:00
+								sandbox_policy = "workspace-write"
-												Add an elicitation for approve patch and refactor tool calls (#1642)

1. Added an elicitation for `approve-patch` which is very similar to
`approve-exec`.
2. Extracted both elicitations to their own files to prevent
`codex_tool_runner` from blowing up in size.
											
										
										
											2025-07-21 23:58:41 -07:00
 								model_provider = "mock_provider"
 								[model_providers.mock_provider]
 								name = "Mock provider for test"
 								base_url = "{server_uri}/v1"
 								wire_api = "chat"
 								request_max_retries = 0
 								stream_max_retries = 0
 								"#
 								        ),
 								    )
 								}