chore: try to make it easier to debug the flakiness of test_shell_command_approval_triggers_elicitation (#2848)
`test_shell_command_approval_triggers_elicitation()` is one of a number
of integration tests that we have observed to be flaky on GitHub CI, so
this PR tries to reduce the flakiness _and_ to provide us with more
information when it flakes. Specifically:
- Changed the command that we use to trigger the elicitation from `git
init` to `python3 -c 'import pathlib; pathlib.Path(r"{}").touch()'`
because running `git` seems more likely to invite variance.
- Increased the timeout to wait for the task response from 10s to 20s.
- Added more logging.
This commit is contained in:
@@ -283,6 +283,7 @@ impl McpProcess {
|
||||
}
|
||||
|
||||
async fn send_jsonrpc_message(&mut self, message: JSONRPCMessage) -> anyhow::Result<()> {
|
||||
eprintln!("writing message to stdin: {message:?}");
|
||||
let payload = serde_json::to_string(&message)?;
|
||||
self.stdin.write_all(payload.as_bytes()).await?;
|
||||
self.stdin.write_all(b"\n").await?;
|
||||
@@ -294,13 +295,15 @@ impl McpProcess {
|
||||
let mut line = String::new();
|
||||
self.stdout.read_line(&mut line).await?;
|
||||
let message = serde_json::from_str::<JSONRPCMessage>(&line)?;
|
||||
eprintln!("read message from stdout: {message:?}");
|
||||
Ok(message)
|
||||
}
|
||||
|
||||
pub async fn read_stream_until_request_message(&mut self) -> anyhow::Result<JSONRPCRequest> {
|
||||
eprintln!("in read_stream_until_request_message()");
|
||||
|
||||
loop {
|
||||
let message = self.read_jsonrpc_message().await?;
|
||||
eprint!("message: {message:?}");
|
||||
|
||||
match message {
|
||||
JSONRPCMessage::Notification(_) => {
|
||||
@@ -323,10 +326,10 @@ impl McpProcess {
|
||||
&mut self,
|
||||
request_id: RequestId,
|
||||
) -> anyhow::Result<JSONRPCResponse> {
|
||||
eprintln!("in read_stream_until_response_message({request_id:?})");
|
||||
|
||||
loop {
|
||||
let message = self.read_jsonrpc_message().await?;
|
||||
eprint!("message: {message:?}");
|
||||
|
||||
match message {
|
||||
JSONRPCMessage::Notification(_) => {
|
||||
eprintln!("notification: {message:?}");
|
||||
@@ -352,8 +355,6 @@ impl McpProcess {
|
||||
) -> anyhow::Result<mcp_types::JSONRPCError> {
|
||||
loop {
|
||||
let message = self.read_jsonrpc_message().await?;
|
||||
eprint!("message: {message:?}");
|
||||
|
||||
match message {
|
||||
JSONRPCMessage::Notification(_) => {
|
||||
eprintln!("notification: {message:?}");
|
||||
@@ -377,10 +378,10 @@ impl McpProcess {
|
||||
&mut self,
|
||||
method: &str,
|
||||
) -> anyhow::Result<JSONRPCNotification> {
|
||||
eprintln!("in read_stream_until_notification_message({method})");
|
||||
|
||||
loop {
|
||||
let message = self.read_jsonrpc_message().await?;
|
||||
eprint!("message: {message:?}");
|
||||
|
||||
match message {
|
||||
JSONRPCMessage::Notification(notification) => {
|
||||
if notification.method == method {
|
||||
@@ -405,10 +406,10 @@ impl McpProcess {
|
||||
pub async fn read_stream_until_legacy_task_complete_notification(
|
||||
&mut self,
|
||||
) -> anyhow::Result<JSONRPCNotification> {
|
||||
eprintln!("in read_stream_until_legacy_task_complete_notification()");
|
||||
|
||||
loop {
|
||||
let message = self.read_jsonrpc_message().await?;
|
||||
eprint!("message: {message:?}");
|
||||
|
||||
match message {
|
||||
JSONRPCMessage::Notification(notification) => {
|
||||
let is_match = if notification.method == "codex/event" {
|
||||
@@ -427,6 +428,8 @@ impl McpProcess {
|
||||
|
||||
if is_match {
|
||||
return Ok(notification);
|
||||
} else {
|
||||
eprintln!("ignoring notification: {notification:?}");
|
||||
}
|
||||
}
|
||||
JSONRPCMessage::Request(_) => {
|
||||
|
||||
@@ -30,7 +30,8 @@ use mcp_test_support::create_final_assistant_message_sse_response;
|
||||
use mcp_test_support::create_mock_chat_completions_server;
|
||||
use mcp_test_support::create_shell_sse_response;
|
||||
|
||||
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
|
||||
// Allow ample time on slower CI or under load to avoid flakes.
|
||||
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20);
|
||||
|
||||
/// Test that a shell command that is not on the "trusted" list triggers an
|
||||
/// elicitation request to the MCP and that sending the approval runs the
|
||||
@@ -52,9 +53,22 @@ async fn test_shell_command_approval_triggers_elicitation() {
|
||||
}
|
||||
|
||||
async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
// We use `git init` because it will not be on the "trusted" list.
|
||||
let shell_command = vec!["git".to_string(), "init".to_string()];
|
||||
// Use a simple, untrusted command that creates a file so we can
|
||||
// observe a side-effect.
|
||||
//
|
||||
// Cross‑platform approach: run a tiny Python snippet to touch the file
|
||||
// using `python3 -c ...` on all platforms.
|
||||
let workdir_for_shell_function_call = TempDir::new()?;
|
||||
let created_filename = "created_by_shell_tool.txt";
|
||||
let created_file = workdir_for_shell_function_call
|
||||
.path()
|
||||
.join(created_filename);
|
||||
|
||||
let shell_command = vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
format!("import pathlib; pathlib.Path('{created_filename}').touch()"),
|
||||
];
|
||||
|
||||
let McpHandle {
|
||||
process: mut mcp_process,
|
||||
@@ -67,7 +81,7 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
Some(5_000),
|
||||
"call1234",
|
||||
)?,
|
||||
create_final_assistant_message_sse_response("Enjoy your new git repo!")?,
|
||||
create_final_assistant_message_sse_response("File created!")?,
|
||||
])
|
||||
.await?;
|
||||
|
||||
@@ -122,8 +136,7 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
.expect("task_complete_notification timeout")
|
||||
.expect("task_complete_notification resp");
|
||||
|
||||
// Verify the original `codex` tool call completes and that `git init` ran
|
||||
// successfully.
|
||||
// Verify the original `codex` tool call completes and that the file was created.
|
||||
let codex_response = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
|
||||
@@ -136,7 +149,7 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
result: json!({
|
||||
"content": [
|
||||
{
|
||||
"text": "Enjoy your new git repo!",
|
||||
"text": "File created!",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
@@ -145,10 +158,7 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
codex_response
|
||||
);
|
||||
|
||||
assert!(
|
||||
workdir_for_shell_function_call.path().join(".git").is_dir(),
|
||||
".git folder should have been created"
|
||||
);
|
||||
assert!(created_file.is_file(), "created file should exist");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user