From d7953aed747eacef07ee17a3790b3cc75e8c836c Mon Sep 17 00:00:00 2001 From: Eric Traut Date: Wed, 5 Nov 2025 15:12:25 -0600 Subject: [PATCH] Fixes intermittent test failures in CI (#6282) I'm seeing two tests fail intermittently in CI. This PR attempts to address (or at least mitigate) the flakiness. * summarize_context_three_requests_and_instructions - The test snapshots server.received_requests() immediately after observing TaskComplete. Because the OpenAI /v1/responses call is streamed, the HTTP request can still be draining when that event fires, so wiremock occasionally reports only two captured requests. Fix is to wait for async activity to complete. * archive_conversation_moves_rollout_into_archived_directory - times out on a slow CI run. Mitigation is to increase timeout value from 10s to 20s. --- .../tests/suite/archive_conversation.rs | 2 +- codex-rs/core/tests/suite/compact.rs | 21 ++++++++----------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/codex-rs/app-server/tests/suite/archive_conversation.rs b/codex-rs/app-server/tests/suite/archive_conversation.rs index 2d8f746a..b6e85936 100644 --- a/codex-rs/app-server/tests/suite/archive_conversation.rs +++ b/codex-rs/app-server/tests/suite/archive_conversation.rs @@ -12,7 +12,7 @@ use std::path::Path; use tempfile::TempDir; use tokio::time::timeout; -const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20); #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn archive_conversation_moves_rollout_into_archived_directory() -> Result<()> { diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index b634cf0e..72fe2184 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -108,19 +108,19 @@ async fn summarize_context_three_requests_and_instructions() { let body = std::str::from_utf8(&req.body).unwrap_or(""); body.contains("\"text\":\"hello world\"") && !body.contains(COMPACT_PROMPT_MARKER) }; - mount_sse_once_match(&server, first_matcher, sse1).await; + let first_request_mock = mount_sse_once_match(&server, first_matcher, sse1).await; let second_matcher = |req: &wiremock::Request| { let body = std::str::from_utf8(&req.body).unwrap_or(""); body.contains(COMPACT_PROMPT_MARKER) }; - mount_sse_once_match(&server, second_matcher, sse2).await; + let second_request_mock = mount_sse_once_match(&server, second_matcher, sse2).await; let third_matcher = |req: &wiremock::Request| { let body = std::str::from_utf8(&req.body).unwrap_or(""); body.contains(&format!("\"text\":\"{THIRD_USER_MSG}\"")) }; - mount_sse_once_match(&server, third_matcher, sse3).await; + let third_request_mock = mount_sse_once_match(&server, third_matcher, sse3).await; // Build config pointing to the mock server and spawn Codex. let model_provider = ModelProviderInfo { @@ -172,16 +172,13 @@ async fn summarize_context_three_requests_and_instructions() { wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; // Inspect the three captured requests. - let requests = server.received_requests().await.unwrap(); - assert_eq!(requests.len(), 3, "expected exactly three requests"); + let req1 = first_request_mock.single_request(); + let req2 = second_request_mock.single_request(); + let req3 = third_request_mock.single_request(); - let req1 = &requests[0]; - let req2 = &requests[1]; - let req3 = &requests[2]; - - let body1 = req1.body_json::().unwrap(); - let body2 = req2.body_json::().unwrap(); - let body3 = req3.body_json::().unwrap(); + let body1 = req1.body_json(); + let body2 = req2.body_json(); + let body3 = req3.body_json(); // Manual compact should keep the baseline developer instructions. let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();