fix: try to fix flakiness in test_shell_command_approval_triggers_elicitation (#2344)
I still see flakiness in `test_shell_command_approval_triggers_elicitation()` on occasion where `MockServer` claims it has not received all of its expected requests. I recently introduced a similar type of test in #2264, `test_codex_jsonrpc_conversation_flow()`, which I have not seen flake (yet!), so this PR pulls over two things I did in that test: - increased `worker_threads` from `2` to `4` - added an assertion to make sure the `task_complete` notification is received Honestly, I'm still not sure why `MockServer` claims it sometimes does not receive all its expected requests given that we assert that the final `JSONRPCResponse` is read on the stream, but let's give this a shot. Assuming this fixes things, my hypothesis is that the increase in `worker_threads` helps because perhaps there are async tasks in `MockServer` that do not reliably complete fully when there are not enough threads available? If that is correct, it seems like the test would still be flaky, though perhaps with lower frequency?
This commit is contained in:
@@ -35,7 +35,7 @@ const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs
|
||||
/// Test that a shell command that is not on the "trusted" list triggers an
|
||||
/// elicitation request to the MCP and that sending the approval runs the
|
||||
/// command, as expected.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn test_shell_command_approval_triggers_elicitation() {
|
||||
if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
@@ -114,6 +114,16 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Verify task_complete notification arrives before the tool call completes.
|
||||
#[expect(clippy::expect_used)]
|
||||
let _task_complete = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp_process.read_stream_until_legacy_task_complete_notification(),
|
||||
)
|
||||
.await
|
||||
.expect("task_complete_notification timeout")
|
||||
.expect("task_complete_notification resp");
|
||||
|
||||
// Verify the original `codex` tool call completes and that `git init` ran
|
||||
// successfully.
|
||||
let codex_response = timeout(
|
||||
|
||||
@@ -474,4 +474,46 @@ impl McpProcess {
|
||||
}))
|
||||
.await
|
||||
}
|
||||
|
||||
/// Reads notifications until a legacy TaskComplete event is observed:
|
||||
/// Method "codex/event" with params.msg.type == "task_complete".
|
||||
pub async fn read_stream_until_legacy_task_complete_notification(
|
||||
&mut self,
|
||||
) -> anyhow::Result<JSONRPCNotification> {
|
||||
loop {
|
||||
let message = self.read_jsonrpc_message().await?;
|
||||
eprint!("message: {message:?}");
|
||||
|
||||
match message {
|
||||
JSONRPCMessage::Notification(notification) => {
|
||||
let is_match = if notification.method == "codex/event" {
|
||||
if let Some(params) = ¬ification.params {
|
||||
params
|
||||
.get("msg")
|
||||
.and_then(|m| m.get("type"))
|
||||
.and_then(|t| t.as_str())
|
||||
== Some("task_complete")
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if is_match {
|
||||
return Ok(notification);
|
||||
}
|
||||
}
|
||||
JSONRPCMessage::Request(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Error(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Response(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Response: {message:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user