test: faster test execution in codex-core (#2633)

this dramatically improves time to run `cargo test -p codex-core` (~25x speedup). before: ``` cargo test -p codex-core 35.96s user 68.63s system 19% cpu 8:49.80 total ``` after: ``` cargo test -p codex-core 5.51s user 8.16s system 63% cpu 21.407 total ``` both tests measured "hot", i.e. on a 2nd run with no filesystem changes, to exclude compile times. approach inspired by [Delete Cargo Integration Tests](https://matklad.github.io/2021/02/27/delete-cargo-integration-tests.html), we move all test cases in tests/ into a single suite in order to have a single binary, as there is significant overhead for each test binary executed, and because test execution is only parallelized with a single binary.
2025-08-24 11:10:53 -07:00
parent c6a52d611c
commit 32bbbbad61
56 changed files with 78 additions and 3 deletions
--- a/codex-rs/mcp-server/tests/suite/auth.rs
+++ b/codex-rs/mcp-server/tests/suite/auth.rs
@@ -0,0 +1,142 @@
+use std::path::Path;
+
+use codex_login::login_with_api_key;
+use codex_protocol::mcp_protocol::AuthMode;
+use codex_protocol::mcp_protocol::GetAuthStatusParams;
+use codex_protocol::mcp_protocol::GetAuthStatusResponse;
+use mcp_test_support::McpProcess;
+use mcp_test_support::to_response;
+use mcp_types::JSONRPCResponse;
+use mcp_types::RequestId;
+use pretty_assertions::assert_eq;
+use tempfile::TempDir;
+use tokio::time::timeout;
+
+const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
+
+// Helper to create a config.toml; mirrors create_conversation.rs
+fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
+    let config_toml = codex_home.join("config.toml");
+    std::fs::write(
+        config_toml,
+        r#"
+model = "mock-model"
+approval_policy = "never"
+sandbox_mode = "danger-full-access"
+
+model_provider = "mock_provider"
+
+[model_providers.mock_provider]
+name = "Mock provider for test"
+base_url = "http://127.0.0.1:0/v1"
+wire_api = "chat"
+request_max_retries = 0
+stream_max_retries = 0
+"#,
+    )
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn get_auth_status_no_auth() {
+    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
+    create_config_toml(codex_home.path()).expect("write config.toml");
+
+    let mut mcp = McpProcess::new(codex_home.path())
+        .await
+        .expect("spawn mcp process");
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
+        .await
+        .expect("init timeout")
+        .expect("init failed");
+
+    let request_id = mcp
+        .send_get_auth_status_request(GetAuthStatusParams {
+            include_token: Some(true),
+            refresh_token: Some(false),
+        })
+        .await
+        .expect("send getAuthStatus");
+
+    let resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
+    )
+    .await
+    .expect("getAuthStatus timeout")
+    .expect("getAuthStatus response");
+    let status: GetAuthStatusResponse = to_response(resp).expect("deserialize status");
+    assert_eq!(status.auth_method, None, "expected no auth method");
+    assert_eq!(status.auth_token, None, "expected no token");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn get_auth_status_with_api_key() {
+    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
+    create_config_toml(codex_home.path()).expect("write config.toml");
+    login_with_api_key(codex_home.path(), "sk-test-key").expect("seed api key");
+
+    let mut mcp = McpProcess::new(codex_home.path())
+        .await
+        .expect("spawn mcp process");
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
+        .await
+        .expect("init timeout")
+        .expect("init failed");
+
+    let request_id = mcp
+        .send_get_auth_status_request(GetAuthStatusParams {
+            include_token: Some(true),
+            refresh_token: Some(false),
+        })
+        .await
+        .expect("send getAuthStatus");
+
+    let resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
+    )
+    .await
+    .expect("getAuthStatus timeout")
+    .expect("getAuthStatus response");
+    let status: GetAuthStatusResponse = to_response(resp).expect("deserialize status");
+    assert_eq!(status.auth_method, Some(AuthMode::ApiKey));
+    assert_eq!(status.auth_token, Some("sk-test-key".to_string()));
+    assert_eq!(status.preferred_auth_method, AuthMode::ChatGPT);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn get_auth_status_with_api_key_no_include_token() {
+    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
+    create_config_toml(codex_home.path()).expect("write config.toml");
+    login_with_api_key(codex_home.path(), "sk-test-key").expect("seed api key");
+
+    let mut mcp = McpProcess::new(codex_home.path())
+        .await
+        .expect("spawn mcp process");
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
+        .await
+        .expect("init timeout")
+        .expect("init failed");
+
+    // Build params via struct so None field is omitted in wire JSON.
+    let params = GetAuthStatusParams {
+        include_token: None,
+        refresh_token: Some(false),
+    };
+    let request_id = mcp
+        .send_get_auth_status_request(params)
+        .await
+        .expect("send getAuthStatus");
+
+    let resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
+    )
+    .await
+    .expect("getAuthStatus timeout")
+    .expect("getAuthStatus response");
+    let status: GetAuthStatusResponse = to_response(resp).expect("deserialize status");
+    assert_eq!(status.auth_method, Some(AuthMode::ApiKey));
+    assert!(status.auth_token.is_none(), "token must be omitted");
+    assert_eq!(status.preferred_auth_method, AuthMode::ChatGPT);
+}
--- a/codex-rs/mcp-server/tests/suite/codex_message_processor_flow.rs
+++ b/codex-rs/mcp-server/tests/suite/codex_message_processor_flow.rs
@@ -0,0 +1,370 @@
+use std::path::Path;
+
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::SandboxPolicy;
+use codex_core::protocol_config_types::ReasoningEffort;
+use codex_core::protocol_config_types::ReasoningSummary;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_protocol::mcp_protocol::AddConversationListenerParams;
+use codex_protocol::mcp_protocol::AddConversationSubscriptionResponse;
+use codex_protocol::mcp_protocol::EXEC_COMMAND_APPROVAL_METHOD;
+use codex_protocol::mcp_protocol::NewConversationParams;
+use codex_protocol::mcp_protocol::NewConversationResponse;
+use codex_protocol::mcp_protocol::RemoveConversationListenerParams;
+use codex_protocol::mcp_protocol::RemoveConversationSubscriptionResponse;
+use codex_protocol::mcp_protocol::SendUserMessageParams;
+use codex_protocol::mcp_protocol::SendUserMessageResponse;
+use codex_protocol::mcp_protocol::SendUserTurnParams;
+use codex_protocol::mcp_protocol::SendUserTurnResponse;
+use mcp_test_support::McpProcess;
+use mcp_test_support::create_final_assistant_message_sse_response;
+use mcp_test_support::create_mock_chat_completions_server;
+use mcp_test_support::create_shell_sse_response;
+use mcp_test_support::to_response;
+use mcp_types::JSONRPCNotification;
+use mcp_types::JSONRPCResponse;
+use mcp_types::RequestId;
+use pretty_assertions::assert_eq;
+use std::env;
+use tempfile::TempDir;
+use tokio::time::timeout;
+
+const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn test_codex_jsonrpc_conversation_flow() {
+    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    let tmp = TempDir::new().expect("tmp dir");
+    // Temporary Codex home with config pointing at the mock server.
+    let codex_home = tmp.path().join("codex_home");
+    std::fs::create_dir(&codex_home).expect("create codex home dir");
+    let working_directory = tmp.path().join("workdir");
+    std::fs::create_dir(&working_directory).expect("create working directory");
+
+    // Create a mock model server that immediately ends each turn.
+    // Two turns are expected: initial session configure + one user message.
+    let responses = vec![
+        create_shell_sse_response(
+            vec!["ls".to_string()],
+            Some(&working_directory),
+            Some(5000),
+            "call1234",
+        )
+        .expect("create shell sse response"),
+        create_final_assistant_message_sse_response("Enjoy your new git repo!")
+            .expect("create final assistant message"),
+    ];
+    let server = create_mock_chat_completions_server(responses).await;
+    create_config_toml(&codex_home, &server.uri()).expect("write config");
+
+    // Start MCP server and initialize.
+    let mut mcp = McpProcess::new(&codex_home).await.expect("spawn mcp");
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
+        .await
+        .expect("init timeout")
+        .expect("init error");
+
+    // 1) newConversation
+    let new_conv_id = mcp
+        .send_new_conversation_request(NewConversationParams {
+            cwd: Some(working_directory.to_string_lossy().into_owned()),
+            ..Default::default()
+        })
+        .await
+        .expect("send newConversation");
+    let new_conv_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
+    )
+    .await
+    .expect("newConversation timeout")
+    .expect("newConversation resp");
+    let new_conv_resp = to_response::<NewConversationResponse>(new_conv_resp)
+        .expect("deserialize newConversation response");
+    let NewConversationResponse {
+        conversation_id,
+        model,
+    } = new_conv_resp;
+    assert_eq!(model, "mock-model");
+
+    // 2) addConversationListener
+    let add_listener_id = mcp
+        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
+        .await
+        .expect("send addConversationListener");
+    let add_listener_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
+    )
+    .await
+    .expect("addConversationListener timeout")
+    .expect("addConversationListener resp");
+    let AddConversationSubscriptionResponse { subscription_id } =
+        to_response::<AddConversationSubscriptionResponse>(add_listener_resp)
+            .expect("deserialize addConversationListener response");
+
+    // 3) sendUserMessage (should trigger notifications; we only validate an OK response)
+    let send_user_id = mcp
+        .send_send_user_message_request(SendUserMessageParams {
+            conversation_id,
+            items: vec![codex_protocol::mcp_protocol::InputItem::Text {
+                text: "text".to_string(),
+            }],
+        })
+        .await
+        .expect("send sendUserMessage");
+    let send_user_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(send_user_id)),
+    )
+    .await
+    .expect("sendUserMessage timeout")
+    .expect("sendUserMessage resp");
+    let SendUserMessageResponse {} = to_response::<SendUserMessageResponse>(send_user_resp)
+        .expect("deserialize sendUserMessage response");
+
+    // Verify the task_finished notification is received.
+    // Note this also ensures that the final request to the server was made.
+    let task_finished_notification: JSONRPCNotification = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/task_complete"),
+    )
+    .await
+    .expect("task_finished_notification timeout")
+    .expect("task_finished_notification resp");
+    let serde_json::Value::Object(map) = task_finished_notification
+        .params
+        .expect("notification should have params")
+    else {
+        panic!("task_finished_notification should have params");
+    };
+    assert_eq!(
+        map.get("conversationId")
+            .expect("should have conversationId"),
+        &serde_json::Value::String(conversation_id.to_string())
+    );
+
+    // 4) removeConversationListener
+    let remove_listener_id = mcp
+        .send_remove_conversation_listener_request(RemoveConversationListenerParams {
+            subscription_id,
+        })
+        .await
+        .expect("send removeConversationListener");
+    let remove_listener_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(remove_listener_id)),
+    )
+    .await
+    .expect("removeConversationListener timeout")
+    .expect("removeConversationListener resp");
+    let RemoveConversationSubscriptionResponse {} =
+        to_response(remove_listener_resp).expect("deserialize removeConversationListener response");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn test_send_user_turn_changes_approval_policy_behavior() {
+    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    let tmp = TempDir::new().expect("tmp dir");
+    let codex_home = tmp.path().join("codex_home");
+    std::fs::create_dir(&codex_home).expect("create codex home dir");
+    let working_directory = tmp.path().join("workdir");
+    std::fs::create_dir(&working_directory).expect("create working directory");
+
+    // Mock server will request a python shell call for the first and second turn, then finish.
+    let responses = vec![
+        create_shell_sse_response(
+            vec![
+                "python3".to_string(),
+                "-c".to_string(),
+                "print(42)".to_string(),
+            ],
+            Some(&working_directory),
+            Some(5000),
+            "call1",
+        )
+        .expect("create first shell sse response"),
+        create_final_assistant_message_sse_response("done 1")
+            .expect("create final assistant message 1"),
+        create_shell_sse_response(
+            vec![
+                "python3".to_string(),
+                "-c".to_string(),
+                "print(42)".to_string(),
+            ],
+            Some(&working_directory),
+            Some(5000),
+            "call2",
+        )
+        .expect("create second shell sse response"),
+        create_final_assistant_message_sse_response("done 2")
+            .expect("create final assistant message 2"),
+    ];
+    let server = create_mock_chat_completions_server(responses).await;
+    create_config_toml(&codex_home, &server.uri()).expect("write config");
+
+    // Start MCP server and initialize.
+    let mut mcp = McpProcess::new(&codex_home).await.expect("spawn mcp");
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
+        .await
+        .expect("init timeout")
+        .expect("init error");
+
+    // 1) Start conversation with approval_policy=untrusted
+    let new_conv_id = mcp
+        .send_new_conversation_request(NewConversationParams {
+            cwd: Some(working_directory.to_string_lossy().into_owned()),
+            ..Default::default()
+        })
+        .await
+        .expect("send newConversation");
+    let new_conv_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
+    )
+    .await
+    .expect("newConversation timeout")
+    .expect("newConversation resp");
+    let NewConversationResponse {
+        conversation_id, ..
+    } = to_response::<NewConversationResponse>(new_conv_resp)
+        .expect("deserialize newConversation response");
+
+    // 2) addConversationListener
+    let add_listener_id = mcp
+        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
+        .await
+        .expect("send addConversationListener");
+    let _: AddConversationSubscriptionResponse =
+        to_response::<AddConversationSubscriptionResponse>(
+            timeout(
+                DEFAULT_READ_TIMEOUT,
+                mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
+            )
+            .await
+            .expect("addConversationListener timeout")
+            .expect("addConversationListener resp"),
+        )
+        .expect("deserialize addConversationListener response");
+
+    // 3) sendUserMessage triggers a shell call; approval policy is Untrusted so we should get an elicitation
+    let send_user_id = mcp
+        .send_send_user_message_request(SendUserMessageParams {
+            conversation_id,
+            items: vec![codex_protocol::mcp_protocol::InputItem::Text {
+                text: "run python".to_string(),
+            }],
+        })
+        .await
+        .expect("send sendUserMessage");
+    let _send_user_resp: SendUserMessageResponse = to_response::<SendUserMessageResponse>(
+        timeout(
+            DEFAULT_READ_TIMEOUT,
+            mcp.read_stream_until_response_message(RequestId::Integer(send_user_id)),
+        )
+        .await
+        .expect("sendUserMessage timeout")
+        .expect("sendUserMessage resp"),
+    )
+    .expect("deserialize sendUserMessage response");
+
+    // Expect an ExecCommandApproval request (elicitation)
+    let request = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_request_message(),
+    )
+    .await
+    .expect("waiting for exec approval request timeout")
+    .expect("exec approval request");
+    assert_eq!(request.method, EXEC_COMMAND_APPROVAL_METHOD);
+
+    // Approve so the first turn can complete
+    mcp.send_response(
+        request.id,
+        serde_json::json!({ "decision": codex_core::protocol::ReviewDecision::Approved }),
+    )
+    .await
+    .expect("send approval response");
+
+    // Wait for first TaskComplete
+    let _ = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/task_complete"),
+    )
+    .await
+    .expect("task_complete 1 timeout")
+    .expect("task_complete 1 notification");
+
+    // 4) sendUserTurn with approval_policy=never should run without elicitation
+    let send_turn_id = mcp
+        .send_send_user_turn_request(SendUserTurnParams {
+            conversation_id,
+            items: vec![codex_protocol::mcp_protocol::InputItem::Text {
+                text: "run python again".to_string(),
+            }],
+            cwd: working_directory.clone(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::new_read_only_policy(),
+            model: "mock-model".to_string(),
+            effort: ReasoningEffort::Medium,
+            summary: ReasoningSummary::Auto,
+        })
+        .await
+        .expect("send sendUserTurn");
+    // Acknowledge sendUserTurn
+    let _send_turn_resp: SendUserTurnResponse = to_response::<SendUserTurnResponse>(
+        timeout(
+            DEFAULT_READ_TIMEOUT,
+            mcp.read_stream_until_response_message(RequestId::Integer(send_turn_id)),
+        )
+        .await
+        .expect("sendUserTurn timeout")
+        .expect("sendUserTurn resp"),
+    )
+    .expect("deserialize sendUserTurn response");
+
+    // Ensure we do NOT receive an ExecCommandApproval request before the task completes.
+    // If any Request is seen while waiting for task_complete, the helper will error and the test fails.
+    let _ = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/task_complete"),
+    )
+    .await
+    .expect("task_complete 2 timeout")
+    .expect("task_complete 2 notification");
+}
+
+// Helper: minimal config.toml pointing at mock provider.
+fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
+    let config_toml = codex_home.join("config.toml");
+    std::fs::write(
+        config_toml,
+        format!(
+            r#"
+model = "mock-model"
+approval_policy = "untrusted"
+
+model_provider = "mock_provider"
+
+[model_providers.mock_provider]
+name = "Mock provider for test"
+base_url = "{server_uri}/v1"
+wire_api = "chat"
+request_max_retries = 0
+stream_max_retries = 0
+"#
+        ),
+    )
+}
--- a/codex-rs/mcp-server/tests/suite/codex_tool.rs
+++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs
@@ -0,0 +1,452 @@
+use std::collections::HashMap;
+use std::env;
+use std::path::Path;
+use std::path::PathBuf;
+
+use codex_core::protocol::FileChange;
+use codex_core::protocol::ReviewDecision;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_mcp_server::CodexToolCallParam;
+use codex_mcp_server::ExecApprovalElicitRequestParams;
+use codex_mcp_server::ExecApprovalResponse;
+use codex_mcp_server::PatchApprovalElicitRequestParams;
+use codex_mcp_server::PatchApprovalResponse;
+use mcp_types::ElicitRequest;
+use mcp_types::ElicitRequestParamsRequestedSchema;
+use mcp_types::JSONRPC_VERSION;
+use mcp_types::JSONRPCRequest;
+use mcp_types::JSONRPCResponse;
+use mcp_types::ModelContextProtocolRequest;
+use mcp_types::RequestId;
+use pretty_assertions::assert_eq;
+use serde_json::json;
+use tempfile::TempDir;
+use tokio::time::timeout;
+use wiremock::MockServer;
+
+use mcp_test_support::McpProcess;
+use mcp_test_support::create_apply_patch_sse_response;
+use mcp_test_support::create_final_assistant_message_sse_response;
+use mcp_test_support::create_mock_chat_completions_server;
+use mcp_test_support::create_shell_sse_response;
+
+const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
+
+/// Test that a shell command that is not on the "trusted" list triggers an
+/// elicitation request to the MCP and that sending the approval runs the
+/// command, as expected.
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn test_shell_command_approval_triggers_elicitation() {
+    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Apparently `#[tokio::test]` must return `()`, so we create a helper
+    // function that returns `Result` so we can use `?` in favor of `unwrap`.
+    if let Err(err) = shell_command_approval_triggers_elicitation().await {
+        panic!("failure: {err}");
+    }
+}
+
+async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
+    // We use `git init` because it will not be on the "trusted" list.
+    let shell_command = vec!["git".to_string(), "init".to_string()];
+    let workdir_for_shell_function_call = TempDir::new()?;
+
+    let McpHandle {
+        process: mut mcp_process,
+        server: _server,
+        dir: _dir,
+    } = create_mcp_process(vec![
+        create_shell_sse_response(
+            shell_command.clone(),
+            Some(workdir_for_shell_function_call.path()),
+            Some(5_000),
+            "call1234",
+        )?,
+        create_final_assistant_message_sse_response("Enjoy your new git repo!")?,
+    ])
+    .await?;
+
+    // Send a "codex" tool request, which should hit the completions endpoint.
+    // In turn, it should reply with a tool call, which the MCP should forward
+    // as an elicitation.
+    let codex_request_id = mcp_process
+        .send_codex_tool_call(CodexToolCallParam {
+            prompt: "run `git init`".to_string(),
+            ..Default::default()
+        })
+        .await?;
+    let elicitation_request = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp_process.read_stream_until_request_message(),
+    )
+    .await??;
+
+    let elicitation_request_id = elicitation_request.id.clone();
+    let params = serde_json::from_value::<ExecApprovalElicitRequestParams>(
+        elicitation_request
+            .params
+            .clone()
+            .ok_or_else(|| anyhow::anyhow!("elicitation_request.params must be set"))?,
+    )?;
+    let expected_elicitation_request = create_expected_elicitation_request(
+        elicitation_request_id.clone(),
+        shell_command.clone(),
+        workdir_for_shell_function_call.path(),
+        codex_request_id.to_string(),
+        params.codex_event_id.clone(),
+    )?;
+    assert_eq!(expected_elicitation_request, elicitation_request);
+
+    // Accept the `git init` request by responding to the elicitation.
+    mcp_process
+        .send_response(
+            elicitation_request_id,
+            serde_json::to_value(ExecApprovalResponse {
+                decision: ReviewDecision::Approved,
+            })?,
+        )
+        .await?;
+
+    // Verify task_complete notification arrives before the tool call completes.
+    #[expect(clippy::expect_used)]
+    let _task_complete = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp_process.read_stream_until_legacy_task_complete_notification(),
+    )
+    .await
+    .expect("task_complete_notification timeout")
+    .expect("task_complete_notification resp");
+
+    // Verify the original `codex` tool call completes and that `git init` ran
+    // successfully.
+    let codex_response = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
+    )
+    .await??;
+    assert_eq!(
+        JSONRPCResponse {
+            jsonrpc: JSONRPC_VERSION.into(),
+            id: RequestId::Integer(codex_request_id),
+            result: json!({
+                "content": [
+                    {
+                        "text": "Enjoy your new git repo!",
+                        "type": "text"
+                    }
+                ]
+            }),
+        },
+        codex_response
+    );
+
+    assert!(
+        workdir_for_shell_function_call.path().join(".git").is_dir(),
+        ".git folder should have been created"
+    );
+
+    Ok(())
+}
+
+fn create_expected_elicitation_request(
+    elicitation_request_id: RequestId,
+    command: Vec<String>,
+    workdir: &Path,
+    codex_mcp_tool_call_id: String,
+    codex_event_id: String,
+) -> anyhow::Result<JSONRPCRequest> {
+    let expected_message = format!(
+        "Allow Codex to run `{}` in `{}`?",
+        shlex::try_join(command.iter().map(|s| s.as_ref()))?,
+        workdir.to_string_lossy()
+    );
+    Ok(JSONRPCRequest {
+        jsonrpc: JSONRPC_VERSION.into(),
+        id: elicitation_request_id,
+        method: ElicitRequest::METHOD.to_string(),
+        params: Some(serde_json::to_value(&ExecApprovalElicitRequestParams {
+            message: expected_message,
+            requested_schema: ElicitRequestParamsRequestedSchema {
+                r#type: "object".to_string(),
+                properties: json!({}),
+                required: None,
+            },
+            codex_elicitation: "exec-approval".to_string(),
+            codex_mcp_tool_call_id,
+            codex_event_id,
+            codex_command: command,
+            codex_cwd: workdir.to_path_buf(),
+            codex_call_id: "call1234".to_string(),
+        })?),
+    })
+}
+
+/// Test that patch approval triggers an elicitation request to the MCP and that
+/// sending the approval applies the patch, as expected.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn test_patch_approval_triggers_elicitation() {
+    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    if let Err(err) = patch_approval_triggers_elicitation().await {
+        panic!("failure: {err}");
+    }
+}
+
+async fn patch_approval_triggers_elicitation() -> anyhow::Result<()> {
+    let cwd = TempDir::new()?;
+    let test_file = cwd.path().join("destination_file.txt");
+    std::fs::write(&test_file, "original content\n")?;
+
+    let patch_content = format!(
+        "*** Begin Patch\n*** Update File: {}\n-original content\n+modified content\n*** End Patch",
+        test_file.as_path().to_string_lossy()
+    );
+
+    let McpHandle {
+        process: mut mcp_process,
+        server: _server,
+        dir: _dir,
+    } = create_mcp_process(vec![
+        create_apply_patch_sse_response(&patch_content, "call1234")?,
+        create_final_assistant_message_sse_response("Patch has been applied successfully!")?,
+    ])
+    .await?;
+
+    // Send a "codex" tool request that will trigger the apply_patch command
+    let codex_request_id = mcp_process
+        .send_codex_tool_call(CodexToolCallParam {
+            cwd: Some(cwd.path().to_string_lossy().to_string()),
+            prompt: "please modify the test file".to_string(),
+            ..Default::default()
+        })
+        .await?;
+    let elicitation_request = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp_process.read_stream_until_request_message(),
+    )
+    .await??;
+
+    let elicitation_request_id = RequestId::Integer(0);
+
+    let mut expected_changes = HashMap::new();
+    expected_changes.insert(
+        test_file.as_path().to_path_buf(),
+        FileChange::Update {
+            unified_diff: "@@ -1 +1 @@\n-original content\n+modified content\n".to_string(),
+            move_path: None,
+        },
+    );
+
+    let expected_elicitation_request = create_expected_patch_approval_elicitation_request(
+        elicitation_request_id.clone(),
+        expected_changes,
+        None, // No grant_root expected
+        None, // No reason expected
+        codex_request_id.to_string(),
+        "1".to_string(),
+    )?;
+    assert_eq!(expected_elicitation_request, elicitation_request);
+
+    // Accept the patch approval request by responding to the elicitation
+    mcp_process
+        .send_response(
+            elicitation_request_id,
+            serde_json::to_value(PatchApprovalResponse {
+                decision: ReviewDecision::Approved,
+            })?,
+        )
+        .await?;
+
+    // Verify the original `codex` tool call completes
+    let codex_response = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
+    )
+    .await??;
+    assert_eq!(
+        JSONRPCResponse {
+            jsonrpc: JSONRPC_VERSION.into(),
+            id: RequestId::Integer(codex_request_id),
+            result: json!({
+                "content": [
+                    {
+                        "text": "Patch has been applied successfully!",
+                        "type": "text"
+                    }
+                ]
+            }),
+        },
+        codex_response
+    );
+
+    let file_contents = std::fs::read_to_string(test_file.as_path())?;
+    assert_eq!(file_contents, "modified content\n");
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn test_codex_tool_passes_base_instructions() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Apparently `#[tokio::test]` must return `()`, so we create a helper
+    // function that returns `Result` so we can use `?` in favor of `unwrap`.
+    if let Err(err) = codex_tool_passes_base_instructions().await {
+        panic!("failure: {err}");
+    }
+}
+
+async fn codex_tool_passes_base_instructions() -> anyhow::Result<()> {
+    #![expect(clippy::unwrap_used)]
+
+    let server =
+        create_mock_chat_completions_server(vec![create_final_assistant_message_sse_response(
+            "Enjoy!",
+        )?])
+        .await;
+
+    // Run `codex mcp` with a specific config.toml.
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;
+    let mut mcp_process = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
+
+    // Send a "codex" tool request, which should hit the completions endpoint.
+    let codex_request_id = mcp_process
+        .send_codex_tool_call(CodexToolCallParam {
+            prompt: "How are you?".to_string(),
+            base_instructions: Some("You are a helpful assistant.".to_string()),
+            ..Default::default()
+        })
+        .await?;
+
+    let codex_response = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
+    )
+    .await??;
+    assert_eq!(
+        JSONRPCResponse {
+            jsonrpc: JSONRPC_VERSION.into(),
+            id: RequestId::Integer(codex_request_id),
+            result: json!({
+                "content": [
+                    {
+                        "text": "Enjoy!",
+                        "type": "text"
+                    }
+                ]
+            }),
+        },
+        codex_response
+    );
+
+    let requests = server.received_requests().await.unwrap();
+    let request = requests[0].body_json::<serde_json::Value>().unwrap();
+    let instructions = request["messages"][0]["content"].as_str().unwrap();
+    assert!(instructions.starts_with("You are a helpful assistant."));
+
+    Ok(())
+}
+
+fn create_expected_patch_approval_elicitation_request(
+    elicitation_request_id: RequestId,
+    changes: HashMap<PathBuf, FileChange>,
+    grant_root: Option<PathBuf>,
+    reason: Option<String>,
+    codex_mcp_tool_call_id: String,
+    codex_event_id: String,
+) -> anyhow::Result<JSONRPCRequest> {
+    let mut message_lines = Vec::new();
+    if let Some(r) = &reason {
+        message_lines.push(r.clone());
+    }
+    message_lines.push("Allow Codex to apply proposed code changes?".to_string());
+
+    Ok(JSONRPCRequest {
+        jsonrpc: JSONRPC_VERSION.into(),
+        id: elicitation_request_id,
+        method: ElicitRequest::METHOD.to_string(),
+        params: Some(serde_json::to_value(&PatchApprovalElicitRequestParams {
+            message: message_lines.join("\n"),
+            requested_schema: ElicitRequestParamsRequestedSchema {
+                r#type: "object".to_string(),
+                properties: json!({}),
+                required: None,
+            },
+            codex_elicitation: "patch-approval".to_string(),
+            codex_mcp_tool_call_id,
+            codex_event_id,
+            codex_reason: reason,
+            codex_grant_root: grant_root,
+            codex_changes: changes,
+            codex_call_id: "call1234".to_string(),
+        })?),
+    })
+}
+
+/// This handle is used to ensure that the MockServer and TempDir are not dropped while
+/// the McpProcess is still running.
+pub struct McpHandle {
+    pub process: McpProcess,
+    /// Retain the server for the lifetime of the McpProcess.
+    #[allow(dead_code)]
+    server: MockServer,
+    /// Retain the temporary directory for the lifetime of the McpProcess.
+    #[allow(dead_code)]
+    dir: TempDir,
+}
+
+async fn create_mcp_process(responses: Vec<String>) -> anyhow::Result<McpHandle> {
+    let server = create_mock_chat_completions_server(responses).await;
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;
+    let mut mcp_process = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
+    Ok(McpHandle {
+        process: mcp_process,
+        server,
+        dir: codex_home,
+    })
+}
+
+/// Create a Codex config that uses the mock server as the model provider.
+/// It also uses `approval_policy = "untrusted"` so that we exercise the
+/// elicitation code path for shell commands.
+fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
+    let config_toml = codex_home.join("config.toml");
+    std::fs::write(
+        config_toml,
+        format!(
+            r#"
+model = "mock-model"
+approval_policy = "untrusted"
+sandbox_policy = "read-only"
+
+model_provider = "mock_provider"
+
+[model_providers.mock_provider]
+name = "Mock provider for test"
+base_url = "{server_uri}/v1"
+wire_api = "chat"
+request_max_retries = 0
+stream_max_retries = 0
+"#
+        ),
+    )
+}
--- a/codex-rs/mcp-server/tests/suite/create_conversation.rs
+++ b/codex-rs/mcp-server/tests/suite/create_conversation.rs
@@ -0,0 +1,155 @@
+use std::path::Path;
+
+use codex_protocol::mcp_protocol::AddConversationListenerParams;
+use codex_protocol::mcp_protocol::AddConversationSubscriptionResponse;
+use codex_protocol::mcp_protocol::InputItem;
+use codex_protocol::mcp_protocol::NewConversationParams;
+use codex_protocol::mcp_protocol::NewConversationResponse;
+use codex_protocol::mcp_protocol::SendUserMessageParams;
+use codex_protocol::mcp_protocol::SendUserMessageResponse;
+use mcp_test_support::McpProcess;
+use mcp_test_support::create_final_assistant_message_sse_response;
+use mcp_test_support::create_mock_chat_completions_server;
+use mcp_test_support::to_response;
+use mcp_types::JSONRPCResponse;
+use mcp_types::RequestId;
+use pretty_assertions::assert_eq;
+use serde_json::json;
+use tempfile::TempDir;
+use tokio::time::timeout;
+
+const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn test_conversation_create_and_send_message_ok() {
+    // Mock server – we won't strictly rely on it, but provide one to satisfy any model wiring.
+    let responses = vec![
+        create_final_assistant_message_sse_response("Done").expect("build mock assistant message"),
+    ];
+    let server = create_mock_chat_completions_server(responses).await;
+
+    // Temporary Codex home with config pointing at the mock server.
+    let codex_home = TempDir::new().expect("create temp dir");
+    create_config_toml(codex_home.path(), &server.uri()).expect("write config.toml");
+
+    // Start MCP server process and initialize.
+    let mut mcp = McpProcess::new(codex_home.path())
+        .await
+        .expect("spawn mcp process");
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
+        .await
+        .expect("init timeout")
+        .expect("init failed");
+
+    // Create a conversation via the new JSON-RPC API.
+    let new_conv_id = mcp
+        .send_new_conversation_request(NewConversationParams {
+            model: Some("o3".to_string()),
+            ..Default::default()
+        })
+        .await
+        .expect("send newConversation");
+    let new_conv_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
+    )
+    .await
+    .expect("newConversation timeout")
+    .expect("newConversation resp");
+    let NewConversationResponse {
+        conversation_id,
+        model,
+    } = to_response::<NewConversationResponse>(new_conv_resp)
+        .expect("deserialize newConversation response");
+    assert_eq!(model, "o3");
+
+    // Add a listener so we receive notifications for this conversation (not strictly required for this test).
+    let add_listener_id = mcp
+        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
+        .await
+        .expect("send addConversationListener");
+    let _sub: AddConversationSubscriptionResponse =
+        to_response::<AddConversationSubscriptionResponse>(
+            timeout(
+                DEFAULT_READ_TIMEOUT,
+                mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
+            )
+            .await
+            .expect("addConversationListener timeout")
+            .expect("addConversationListener resp"),
+        )
+        .expect("deserialize addConversationListener response");
+
+    // Now send a user message via the wire API and expect an OK (empty object) result.
+    let send_id = mcp
+        .send_send_user_message_request(SendUserMessageParams {
+            conversation_id,
+            items: vec![InputItem::Text {
+                text: "Hello".to_string(),
+            }],
+        })
+        .await
+        .expect("send sendUserMessage");
+    let send_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(send_id)),
+    )
+    .await
+    .expect("sendUserMessage timeout")
+    .expect("sendUserMessage resp");
+    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(send_resp)
+        .expect("deserialize sendUserMessage response");
+
+    // avoid race condition by waiting for the mock server to receive the chat.completions request
+    let deadline = std::time::Instant::now() + DEFAULT_READ_TIMEOUT;
+    loop {
+        let requests = server.received_requests().await.unwrap_or_default();
+        if !requests.is_empty() {
+            break;
+        }
+        if std::time::Instant::now() >= deadline {
+            panic!("mock server did not receive the chat.completions request in time");
+        }
+        tokio::time::sleep(std::time::Duration::from_millis(10)).await;
+    }
+
+    // Verify the outbound request body matches expectations for Chat Completions.
+    let request = &server.received_requests().await.unwrap()[0];
+    let body = request
+        .body_json::<serde_json::Value>()
+        .expect("parse request body as JSON");
+    assert_eq!(body["model"], json!("o3"));
+    assert!(body["stream"].as_bool().unwrap_or(false));
+    let messages = body["messages"]
+        .as_array()
+        .expect("messages should be array");
+    let last = messages.last().expect("at least one message");
+    assert_eq!(last["role"], json!("user"));
+    assert_eq!(last["content"], json!("Hello"));
+
+    drop(server);
+}
+
+// Helper to create a config.toml pointing at the mock model server.
+fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
+    let config_toml = codex_home.join("config.toml");
+    std::fs::write(
+        config_toml,
+        format!(
+            r#"
+model = "mock-model"
+approval_policy = "never"
+sandbox_mode = "danger-full-access"
+
+model_provider = "mock_provider"
+
+[model_providers.mock_provider]
+name = "Mock provider for test"
+base_url = "{server_uri}/v1"
+wire_api = "chat"
+request_max_retries = 0
+stream_max_retries = 0
+"#
+        ),
+    )
+}
--- a/codex-rs/mcp-server/tests/suite/interrupt.rs
+++ b/codex-rs/mcp-server/tests/suite/interrupt.rs
@@ -0,0 +1,164 @@
+#![cfg(unix)]
+// Support code lives in the `mcp_test_support` crate under tests/common.
+
+use std::path::Path;
+
+use codex_core::protocol::TurnAbortReason;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_protocol::mcp_protocol::AddConversationListenerParams;
+use codex_protocol::mcp_protocol::InterruptConversationParams;
+use codex_protocol::mcp_protocol::InterruptConversationResponse;
+use codex_protocol::mcp_protocol::NewConversationParams;
+use codex_protocol::mcp_protocol::NewConversationResponse;
+use codex_protocol::mcp_protocol::SendUserMessageParams;
+use codex_protocol::mcp_protocol::SendUserMessageResponse;
+use mcp_types::JSONRPCResponse;
+use mcp_types::RequestId;
+use tempfile::TempDir;
+use tokio::time::timeout;
+
+use mcp_test_support::McpProcess;
+use mcp_test_support::create_mock_chat_completions_server;
+use mcp_test_support::create_shell_sse_response;
+use mcp_test_support::to_response;
+
+const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn test_shell_command_interruption() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    if let Err(err) = shell_command_interruption().await {
+        panic!("failure: {err}");
+    }
+}
+
+async fn shell_command_interruption() -> anyhow::Result<()> {
+    // Use a cross-platform blocking command. On Windows plain `sleep` is not guaranteed to exist
+    // (MSYS/GNU coreutils may be absent) and the failure causes the tool call to finish immediately,
+    // which triggers a second model request before the test sends the explicit follow-up. That
+    // prematurely consumes the second mocked SSE response and leads to a third POST (panic: no response for 2).
+    // Powershell Start-Sleep is always available on Windows runners. On Unix we keep using `sleep`.
+    #[cfg(target_os = "windows")]
+    let shell_command = vec![
+        "powershell".to_string(),
+        "-Command".to_string(),
+        "Start-Sleep -Seconds 10".to_string(),
+    ];
+    #[cfg(not(target_os = "windows"))]
+    let shell_command = vec!["sleep".to_string(), "10".to_string()];
+
+    let tmp = TempDir::new()?;
+    // Temporary Codex home with config pointing at the mock server.
+    let codex_home = tmp.path().join("codex_home");
+    std::fs::create_dir(&codex_home)?;
+    let working_directory = tmp.path().join("workdir");
+    std::fs::create_dir(&working_directory)?;
+
+    // Create mock server with a single SSE response: the long sleep command
+    let server = create_mock_chat_completions_server(vec![create_shell_sse_response(
+        shell_command.clone(),
+        Some(&working_directory),
+        Some(10_000), // 10 seconds timeout in ms
+        "call_sleep",
+    )?])
+    .await;
+    create_config_toml(&codex_home, server.uri())?;
+
+    // Start MCP server and initialize.
+    let mut mcp = McpProcess::new(&codex_home).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    // 1) newConversation
+    let new_conv_id = mcp
+        .send_new_conversation_request(NewConversationParams {
+            cwd: Some(working_directory.to_string_lossy().into_owned()),
+            ..Default::default()
+        })
+        .await?;
+    let new_conv_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
+    )
+    .await??;
+    let new_conv_resp = to_response::<NewConversationResponse>(new_conv_resp)?;
+    let NewConversationResponse {
+        conversation_id, ..
+    } = new_conv_resp;
+
+    // 2) addConversationListener
+    let add_listener_id = mcp
+        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
+        .await?;
+    let _add_listener_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
+    )
+    .await??;
+
+    // 3) sendUserMessage (should trigger notifications; we only validate an OK response)
+    let send_user_id = mcp
+        .send_send_user_message_request(SendUserMessageParams {
+            conversation_id,
+            items: vec![codex_protocol::mcp_protocol::InputItem::Text {
+                text: "run first sleep command".to_string(),
+            }],
+        })
+        .await?;
+    let send_user_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(send_user_id)),
+    )
+    .await??;
+    let SendUserMessageResponse {} = to_response::<SendUserMessageResponse>(send_user_resp)?;
+
+    // Give the command a moment to start
+    tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+
+    // 4) send interrupt request
+    let interrupt_id = mcp
+        .send_interrupt_conversation_request(InterruptConversationParams { conversation_id })
+        .await?;
+    let interrupt_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(interrupt_id)),
+    )
+    .await??;
+    let InterruptConversationResponse { abort_reason } =
+        to_response::<InterruptConversationResponse>(interrupt_resp)?;
+    assert_eq!(TurnAbortReason::Interrupted, abort_reason);
+
+    Ok(())
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+fn create_config_toml(codex_home: &Path, server_uri: String) -> std::io::Result<()> {
+    let config_toml = codex_home.join("config.toml");
+    std::fs::write(
+        config_toml,
+        format!(
+            r#"
+model = "mock-model"
+approval_policy = "never"
+sandbox_mode = "danger-full-access"
+
+model_provider = "mock_provider"
+
+[model_providers.mock_provider]
+name = "Mock provider for test"
+base_url = "{server_uri}/v1"
+wire_api = "chat"
+request_max_retries = 0
+stream_max_retries = 0
+"#
+        ),
+    )
+}
--- a/codex-rs/mcp-server/tests/suite/login.rs
+++ b/codex-rs/mcp-server/tests/suite/login.rs
@@ -0,0 +1,146 @@
+use std::path::Path;
+use std::time::Duration;
+
+use codex_login::login_with_api_key;
+use codex_protocol::mcp_protocol::CancelLoginChatGptParams;
+use codex_protocol::mcp_protocol::CancelLoginChatGptResponse;
+use codex_protocol::mcp_protocol::GetAuthStatusParams;
+use codex_protocol::mcp_protocol::GetAuthStatusResponse;
+use codex_protocol::mcp_protocol::LoginChatGptResponse;
+use codex_protocol::mcp_protocol::LogoutChatGptResponse;
+use mcp_test_support::McpProcess;
+use mcp_test_support::to_response;
+use mcp_types::JSONRPCResponse;
+use mcp_types::RequestId;
+use tempfile::TempDir;
+use tokio::time::timeout;
+
+const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
+
+// Helper to create a config.toml; mirrors create_conversation.rs
+fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
+    let config_toml = codex_home.join("config.toml");
+    std::fs::write(
+        config_toml,
+        r#"
+model = "mock-model"
+approval_policy = "never"
+sandbox_mode = "danger-full-access"
+
+model_provider = "mock_provider"
+
+[model_providers.mock_provider]
+name = "Mock provider for test"
+base_url = "http://127.0.0.1:0/v1"
+wire_api = "chat"
+request_max_retries = 0
+stream_max_retries = 0
+"#,
+    )
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn logout_chatgpt_removes_auth() {
+    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
+    create_config_toml(codex_home.path()).expect("write config.toml");
+    login_with_api_key(codex_home.path(), "sk-test-key").expect("seed api key");
+    assert!(codex_home.path().join("auth.json").exists());
+
+    let mut mcp = McpProcess::new(codex_home.path())
+        .await
+        .expect("spawn mcp process");
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
+        .await
+        .expect("init timeout")
+        .expect("init failed");
+
+    let id = mcp
+        .send_logout_chat_gpt_request()
+        .await
+        .expect("send logoutChatGpt");
+    let resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(id)),
+    )
+    .await
+    .expect("logoutChatGpt timeout")
+    .expect("logoutChatGpt response");
+    let _ok: LogoutChatGptResponse = to_response(resp).expect("deserialize logout response");
+
+    assert!(
+        !codex_home.path().join("auth.json").exists(),
+        "auth.json should be deleted"
+    );
+
+    // Verify status reflects signed-out state.
+    let status_id = mcp
+        .send_get_auth_status_request(GetAuthStatusParams {
+            include_token: Some(true),
+            refresh_token: Some(false),
+        })
+        .await
+        .expect("send getAuthStatus");
+    let status_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(status_id)),
+    )
+    .await
+    .expect("getAuthStatus timeout")
+    .expect("getAuthStatus response");
+    let status: GetAuthStatusResponse = to_response(status_resp).expect("deserialize status");
+    assert_eq!(status.auth_method, None);
+    assert_eq!(status.auth_token, None);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn login_and_cancel_chatgpt() {
+    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
+    create_config_toml(codex_home.path()).expect("write config.toml");
+
+    let mut mcp = McpProcess::new(codex_home.path())
+        .await
+        .expect("spawn mcp process");
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
+        .await
+        .expect("init timeout")
+        .expect("init failed");
+
+    let login_id = mcp
+        .send_login_chat_gpt_request()
+        .await
+        .expect("send loginChatGpt");
+    let login_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(login_id)),
+    )
+    .await
+    .expect("loginChatGpt timeout")
+    .expect("loginChatGpt response");
+    let login: LoginChatGptResponse = to_response(login_resp).expect("deserialize login resp");
+
+    let cancel_id = mcp
+        .send_cancel_login_chat_gpt_request(CancelLoginChatGptParams {
+            login_id: login.login_id,
+        })
+        .await
+        .expect("send cancelLoginChatGpt");
+    let cancel_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(cancel_id)),
+    )
+    .await
+    .expect("cancelLoginChatGpt timeout")
+    .expect("cancelLoginChatGpt response");
+    let _ok: CancelLoginChatGptResponse =
+        to_response(cancel_resp).expect("deserialize cancel response");
+
+    // Optionally observe the completion notification; do not fail if it races.
+    let maybe_note = timeout(
+        Duration::from_secs(2),
+        mcp.read_stream_until_notification_message("codex/event/login_chat_gpt_complete"),
+    )
+    .await;
+    if maybe_note.is_err() {
+        eprintln!("warning: did not observe login_chat_gpt_complete notification after cancel");
+    }
+}
--- a/codex-rs/mcp-server/tests/suite/mod.rs
+++ b/codex-rs/mcp-server/tests/suite/mod.rs
@@ -0,0 +1,8 @@
+// Aggregates all former standalone integration tests as modules.
+mod auth;
+mod codex_message_processor_flow;
+mod codex_tool;
+mod create_conversation;
+mod interrupt;
+mod login;
+mod send_message;
--- a/codex-rs/mcp-server/tests/suite/send_message.rs
+++ b/codex-rs/mcp-server/tests/suite/send_message.rs
@@ -0,0 +1,186 @@
+use std::path::Path;
+
+use codex_protocol::mcp_protocol::AddConversationListenerParams;
+use codex_protocol::mcp_protocol::AddConversationSubscriptionResponse;
+use codex_protocol::mcp_protocol::ConversationId;
+use codex_protocol::mcp_protocol::InputItem;
+use codex_protocol::mcp_protocol::NewConversationParams;
+use codex_protocol::mcp_protocol::NewConversationResponse;
+use codex_protocol::mcp_protocol::SendUserMessageParams;
+use codex_protocol::mcp_protocol::SendUserMessageResponse;
+use mcp_test_support::McpProcess;
+use mcp_test_support::create_final_assistant_message_sse_response;
+use mcp_test_support::create_mock_chat_completions_server;
+use mcp_test_support::to_response;
+use mcp_types::JSONRPCNotification;
+use mcp_types::JSONRPCResponse;
+use mcp_types::RequestId;
+use pretty_assertions::assert_eq;
+use tempfile::TempDir;
+use tokio::time::timeout;
+
+const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
+
+#[tokio::test]
+async fn test_send_message_success() {
+    // Spin up a mock completions server that immediately ends the Codex turn.
+    // Two Codex turns hit the mock model (session start + send-user-message). Provide two SSE responses.
+    let responses = vec![
+        create_final_assistant_message_sse_response("Done").expect("build mock assistant message"),
+        create_final_assistant_message_sse_response("Done").expect("build mock assistant message"),
+    ];
+    let server = create_mock_chat_completions_server(responses).await;
+
+    // Create a temporary Codex home with config pointing at the mock server.
+    let codex_home = TempDir::new().expect("create temp dir");
+    create_config_toml(codex_home.path(), &server.uri()).expect("write config.toml");
+
+    // Start MCP server process and initialize.
+    let mut mcp = McpProcess::new(codex_home.path())
+        .await
+        .expect("spawn mcp process");
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
+        .await
+        .expect("init timed out")
+        .expect("init failed");
+
+    // Start a conversation using the new wire API.
+    let new_conv_id = mcp
+        .send_new_conversation_request(NewConversationParams::default())
+        .await
+        .expect("send newConversation");
+    let new_conv_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
+    )
+    .await
+    .expect("newConversation timeout")
+    .expect("newConversation resp");
+    let NewConversationResponse {
+        conversation_id, ..
+    } = to_response::<_>(new_conv_resp).expect("deserialize newConversation response");
+
+    // 2) addConversationListener
+    let add_listener_id = mcp
+        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
+        .await
+        .expect("send addConversationListener");
+    let add_listener_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
+    )
+    .await
+    .expect("addConversationListener timeout")
+    .expect("addConversationListener resp");
+    let AddConversationSubscriptionResponse { subscription_id: _ } =
+        to_response::<_>(add_listener_resp).expect("deserialize addConversationListener response");
+
+    // Now exercise sendUserMessage twice.
+    send_message("Hello", conversation_id, &mut mcp).await;
+    send_message("Hello again", conversation_id, &mut mcp).await;
+}
+
+#[expect(clippy::expect_used)]
+async fn send_message(message: &str, conversation_id: ConversationId, mcp: &mut McpProcess) {
+    // Now exercise sendUserMessage.
+    let send_id = mcp
+        .send_send_user_message_request(SendUserMessageParams {
+            conversation_id,
+            items: vec![InputItem::Text {
+                text: message.to_string(),
+            }],
+        })
+        .await
+        .expect("send sendUserMessage");
+
+    let response: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(send_id)),
+    )
+    .await
+    .expect("sendUserMessage response timeout")
+    .expect("sendUserMessage response error");
+
+    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(response)
+        .expect("deserialize sendUserMessage response");
+
+    // Verify the task_finished notification is received.
+    // Note this also ensures that the final request to the server was made.
+    let task_finished_notification: JSONRPCNotification = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/task_complete"),
+    )
+    .await
+    .expect("task_finished_notification timeout")
+    .expect("task_finished_notification resp");
+    let serde_json::Value::Object(map) = task_finished_notification
+        .params
+        .expect("notification should have params")
+    else {
+        panic!("task_finished_notification should have params");
+    };
+    assert_eq!(
+        map.get("conversationId")
+            .expect("should have conversationId"),
+        &serde_json::Value::String(conversation_id.to_string())
+    );
+}
+
+#[tokio::test]
+async fn test_send_message_session_not_found() {
+    // Start MCP without creating a Codex session
+    let codex_home = TempDir::new().expect("tempdir");
+    let mut mcp = McpProcess::new(codex_home.path()).await.expect("spawn");
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
+        .await
+        .expect("timeout")
+        .expect("init");
+
+    let unknown = ConversationId(uuid::Uuid::new_v4());
+    let req_id = mcp
+        .send_send_user_message_request(SendUserMessageParams {
+            conversation_id: unknown,
+            items: vec![InputItem::Text {
+                text: "ping".to_string(),
+            }],
+        })
+        .await
+        .expect("send sendUserMessage");
+
+    // Expect an error response for unknown conversation.
+    let err = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_error_message(RequestId::Integer(req_id)),
+    )
+    .await
+    .expect("timeout")
+    .expect("error");
+    assert_eq!(err.id, RequestId::Integer(req_id));
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
+    let config_toml = codex_home.join("config.toml");
+    std::fs::write(
+        config_toml,
+        format!(
+            r#"
+model = "mock-model"
+approval_policy = "never"
+sandbox_mode = "danger-full-access"
+
+model_provider = "mock_provider"
+
+[model_providers.mock_provider]
+name = "Mock provider for test"
+base_url = "{server_uri}/v1"
+wire_api = "chat"
+request_max_retries = 0
+stream_max_retries = 0
+"#
+        ),
+    )
+}