Fix flaky windows test (#3564)

There are exactly 4 types of flaky tests in Windows x86 right now: 1. `review_input_isolated_from_parent_history` => Times out waiting for closing events 2. `review_does_not_emit_agent_message_on_structured_output` => Times out waiting for closing events 3. `auto_compact_runs_after_token_limit_hit` => Times out waiting for closing events 4. `auto_compact_runs_after_token_limit_hit` => Also has a problem where auto compact should add a third request, but receives 4 requests. 1, 2, and 3 seem to be solved with increasing threads on windows runner from 2 -> 4. Don't know yet why # 4 is happening, but probably also because of WireMock issues on windows causing races.
2025-09-14 16:20:25 -07:00
parent 51f88fd04a
commit b2f6fc3b9a
2 changed files with 41 additions and 6 deletions
--- a/codex-rs/core/tests/suite/review.rs
+++ b/codex-rs/core/tests/suite/review.rs
@@ -118,7 +118,9 @@ async fn review_op_emits_lifecycle_and_review_output() {
 /// When the model returns plain text that is not JSON, ensure the child
 /// lifecycle still occurs and the plain text is surfaced via
 /// ExitedReviewMode(Some(..)) as the overall_explanation.
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts.
+#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
+#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_op_with_plain_text_emits_review_fallback() {
    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
@@ -168,7 +170,9 @@ async fn review_op_with_plain_text_emits_review_fallback() {

 /// When the model returns structured JSON in a review, ensure no AgentMessage
 /// is emitted; the UI consumes the structured result via ExitedReviewMode.
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts.
+#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
+#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_does_not_emit_agent_message_on_structured_output() {
    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
@@ -293,7 +297,9 @@ async fn review_uses_custom_review_model_from_config() {
 /// When a review session begins, it must not prepend prior chat history from
 /// the parent session. The request `input` should contain only the review
 /// prompt from the user.
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts.
+#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
+#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_input_isolated_from_parent_history() {
    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(