From 1e9e703b969d3f0965b31d1cc3d70fed3ebdd6f6 Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Thu, 28 Aug 2025 12:33:33 -0700 Subject: [PATCH] chore: try to make it easier to debug the flakiness of test_shell_command_approval_triggers_elicitation (#2848) `test_shell_command_approval_triggers_elicitation()` is one of a number of integration tests that we have observed to be flaky on GitHub CI, so this PR tries to reduce the flakiness _and_ to provide us with more information when it flakes. Specifically: - Changed the command that we use to trigger the elicitation from `git init` to `python3 -c 'import pathlib; pathlib.Path(r"{}").touch()'` because running `git` seems more likely to invite variance. - Increased the timeout to wait for the task response from 10s to 20s. - Added more logging. --- .../mcp-server/tests/common/mcp_process.rs | 21 ++++++------ codex-rs/mcp-server/tests/suite/codex_tool.rs | 32 ++++++++++++------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/codex-rs/mcp-server/tests/common/mcp_process.rs b/codex-rs/mcp-server/tests/common/mcp_process.rs index 5788163c..eae83482 100644 --- a/codex-rs/mcp-server/tests/common/mcp_process.rs +++ b/codex-rs/mcp-server/tests/common/mcp_process.rs @@ -283,6 +283,7 @@ impl McpProcess { } async fn send_jsonrpc_message(&mut self, message: JSONRPCMessage) -> anyhow::Result<()> { + eprintln!("writing message to stdin: {message:?}"); let payload = serde_json::to_string(&message)?; self.stdin.write_all(payload.as_bytes()).await?; self.stdin.write_all(b"\n").await?; @@ -294,13 +295,15 @@ impl McpProcess { let mut line = String::new(); self.stdout.read_line(&mut line).await?; let message = serde_json::from_str::(&line)?; + eprintln!("read message from stdout: {message:?}"); Ok(message) } pub async fn read_stream_until_request_message(&mut self) -> anyhow::Result { + eprintln!("in read_stream_until_request_message()"); + loop { let message = self.read_jsonrpc_message().await?; - eprint!("message: {message:?}"); match message { JSONRPCMessage::Notification(_) => { @@ -323,10 +326,10 @@ impl McpProcess { &mut self, request_id: RequestId, ) -> anyhow::Result { + eprintln!("in read_stream_until_response_message({request_id:?})"); + loop { let message = self.read_jsonrpc_message().await?; - eprint!("message: {message:?}"); - match message { JSONRPCMessage::Notification(_) => { eprintln!("notification: {message:?}"); @@ -352,8 +355,6 @@ impl McpProcess { ) -> anyhow::Result { loop { let message = self.read_jsonrpc_message().await?; - eprint!("message: {message:?}"); - match message { JSONRPCMessage::Notification(_) => { eprintln!("notification: {message:?}"); @@ -377,10 +378,10 @@ impl McpProcess { &mut self, method: &str, ) -> anyhow::Result { + eprintln!("in read_stream_until_notification_message({method})"); + loop { let message = self.read_jsonrpc_message().await?; - eprint!("message: {message:?}"); - match message { JSONRPCMessage::Notification(notification) => { if notification.method == method { @@ -405,10 +406,10 @@ impl McpProcess { pub async fn read_stream_until_legacy_task_complete_notification( &mut self, ) -> anyhow::Result { + eprintln!("in read_stream_until_legacy_task_complete_notification()"); + loop { let message = self.read_jsonrpc_message().await?; - eprint!("message: {message:?}"); - match message { JSONRPCMessage::Notification(notification) => { let is_match = if notification.method == "codex/event" { @@ -427,6 +428,8 @@ impl McpProcess { if is_match { return Ok(notification); + } else { + eprintln!("ignoring notification: {notification:?}"); } } JSONRPCMessage::Request(_) => { diff --git a/codex-rs/mcp-server/tests/suite/codex_tool.rs b/codex-rs/mcp-server/tests/suite/codex_tool.rs index 13866d97..e7097b6b 100644 --- a/codex-rs/mcp-server/tests/suite/codex_tool.rs +++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs @@ -30,7 +30,8 @@ use mcp_test_support::create_final_assistant_message_sse_response; use mcp_test_support::create_mock_chat_completions_server; use mcp_test_support::create_shell_sse_response; -const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); +// Allow ample time on slower CI or under load to avoid flakes. +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20); /// Test that a shell command that is not on the "trusted" list triggers an /// elicitation request to the MCP and that sending the approval runs the @@ -52,9 +53,22 @@ async fn test_shell_command_approval_triggers_elicitation() { } async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> { - // We use `git init` because it will not be on the "trusted" list. - let shell_command = vec!["git".to_string(), "init".to_string()]; + // Use a simple, untrusted command that creates a file so we can + // observe a side-effect. + // + // Cross‑platform approach: run a tiny Python snippet to touch the file + // using `python3 -c ...` on all platforms. let workdir_for_shell_function_call = TempDir::new()?; + let created_filename = "created_by_shell_tool.txt"; + let created_file = workdir_for_shell_function_call + .path() + .join(created_filename); + + let shell_command = vec![ + "python3".to_string(), + "-c".to_string(), + format!("import pathlib; pathlib.Path('{created_filename}').touch()"), + ]; let McpHandle { process: mut mcp_process, @@ -67,7 +81,7 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> { Some(5_000), "call1234", )?, - create_final_assistant_message_sse_response("Enjoy your new git repo!")?, + create_final_assistant_message_sse_response("File created!")?, ]) .await?; @@ -122,8 +136,7 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> { .expect("task_complete_notification timeout") .expect("task_complete_notification resp"); - // Verify the original `codex` tool call completes and that `git init` ran - // successfully. + // Verify the original `codex` tool call completes and that the file was created. let codex_response = timeout( DEFAULT_READ_TIMEOUT, mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)), @@ -136,7 +149,7 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> { result: json!({ "content": [ { - "text": "Enjoy your new git repo!", + "text": "File created!", "type": "text" } ] @@ -145,10 +158,7 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> { codex_response ); - assert!( - workdir_for_shell_function_call.path().join(".git").is_dir(), - ".git folder should have been created" - ); + assert!(created_file.is_file(), "created file should exist"); Ok(()) }