From 4a5f05c1361ecdb91b488fbd01e582ca8d711d1a Mon Sep 17 00:00:00 2001
From: Jeremy Rose <172423086+nornagon-openai@users.noreply.github.com>
Date: Thu, 25 Sep 2025 13:11:14 -0700
Subject: [PATCH] make tests pass cleanly in sandbox (#4067)

This changes the reqwest client used in tests to be sandbox-friendly,
and skips a bunch of other tests that don't work inside the
sandbox/without network.
---
 codex-rs/core/src/default_client.rs           | 20 +++++--
 codex-rs/core/src/git_info.rs                 |  2 +
 codex-rs/core/src/unified_exec/mod.rs         | 10 ++++
 codex-rs/core/tests/common/lib.rs             | 54 +++++++++++++++----
 codex-rs/core/tests/suite/cli_stream.rs       | 10 ++--
 codex-rs/core/tests/suite/client.rs           | 14 ++---
 codex-rs/core/tests/suite/compact.rs          | 12 ++---
 .../core/tests/suite/fork_conversation.rs     |  3 ++
 codex-rs/core/tests/suite/json_result.rs      |  4 +-
 codex-rs/core/tests/suite/prompt_caching.rs   |  8 +++
 codex-rs/core/tests/suite/review.rs           | 14 ++---
 .../suite/stream_error_allows_next_turn.rs    |  4 +-
 .../core/tests/suite/stream_no_completed.rs   |  4 +-
 .../core/tests/suite/user_notification.rs     |  4 +-
 codex-rs/exec/tests/suite/apply_patch.rs      |  8 +--
 codex-rs/exec/tests/suite/sandbox.rs          |  2 +
 .../login/tests/suite/login_server_e2e.rs     |  8 +--
 codex-rs/mcp-server/tests/suite/codex_tool.rs |  4 +-
 codex-rs/mcp-server/tests/suite/interrupt.rs  |  4 +-
 19 files changed, 130 insertions(+), 59 deletions(-)
diff --git a/codex-rs/core/src/default_client.rs b/codex-rs/core/src/default_client.rs
index 36212f25..5ce08d9d 100644
--- a/codex-rs/core/src/default_client.rs
+++ b/codex-rs/core/src/default_client.rs
@@ -1,3 +1,4 @@
+use crate::spawn::CODEX_SANDBOX_ENV_VAR;
 use reqwest::header::HeaderValue;
 use std::sync::LazyLock;
 use std::sync::Mutex;
@@ -20,7 +21,6 @@ use std::sync::Mutex;
 pub static USER_AGENT_SUFFIX: LazyLock<Mutex<Option<String>>> = LazyLock::new(|| Mutex::new(None));
 
 pub const CODEX_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR: &str = "CODEX_INTERNAL_ORIGINATOR_OVERRIDE";
-
 #[derive(Debug, Clone)]
 pub struct Originator {
     pub value: String,
@@ -112,17 +112,25 @@ pub fn create_client() -> reqwest::Client {
     headers.insert("originator", ORIGINATOR.header_value.clone());
     let ua = get_codex_user_agent();
 
-    reqwest::Client::builder()
+    let mut builder = reqwest::Client::builder()
         // Set UA via dedicated helper to avoid header validation pitfalls
         .user_agent(ua)
-        .default_headers(headers)
-        .build()
-        .unwrap_or_else(|_| reqwest::Client::new())
+        .default_headers(headers);
+    if is_sandboxed() {
+        builder = builder.no_proxy();
+    }
+
+    builder.build().unwrap_or_else(|_| reqwest::Client::new())
+}
+
+fn is_sandboxed() -> bool {
+    std::env::var(CODEX_SANDBOX_ENV_VAR).as_deref() == Ok("seatbelt")
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use core_test_support::skip_if_no_network;
 
     #[test]
     fn test_get_codex_user_agent() {
@@ -132,6 +140,8 @@ mod tests {
 
     #[tokio::test]
     async fn test_create_client_sets_default_headers() {
+        skip_if_no_network!();
+
         use wiremock::Mock;
         use wiremock::MockServer;
         use wiremock::ResponseTemplate;
diff --git a/codex-rs/core/src/git_info.rs b/codex-rs/core/src/git_info.rs
index 5c16b72d..63ef82d7 100644
--- a/codex-rs/core/src/git_info.rs
+++ b/codex-rs/core/src/git_info.rs
@@ -589,6 +589,7 @@ pub async fn current_branch_name(cwd: &Path) -> Option<String> {
 mod tests {
     use super::*;
 
+    use core_test_support::skip_if_sandbox;
     use std::fs;
     use std::path::PathBuf;
     use tempfile::TempDir;
@@ -660,6 +661,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_recent_commits_orders_and_limits() {
+        skip_if_sandbox!();
         use tokio::time::Duration;
         use tokio::time::sleep;
 
diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs
index 8b81345d..48b82e3e 100644
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -404,6 +404,8 @@ async fn create_unified_exec_session(
 #[cfg(test)]
 mod tests {
     use super::*;
+    #[cfg(unix)]
+    use core_test_support::skip_if_sandbox;
 
     #[test]
     fn push_chunk_trims_only_excess_bytes() {
@@ -425,6 +427,8 @@ mod tests {
     #[cfg(unix)]
     #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
     async fn unified_exec_persists_across_requests_jif() -> Result<(), UnifiedExecError> {
+        skip_if_sandbox!(Ok(()));
+
         let manager = UnifiedExecSessionManager::default();
 
         let open_shell = manager
@@ -462,6 +466,8 @@ mod tests {
     #[cfg(unix)]
     #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
     async fn multi_unified_exec_sessions() -> Result<(), UnifiedExecError> {
+        skip_if_sandbox!(Ok(()));
+
         let manager = UnifiedExecSessionManager::default();
 
         let shell_a = manager
@@ -508,6 +514,8 @@ mod tests {
     #[cfg(unix)]
     #[tokio::test]
     async fn unified_exec_timeouts() -> Result<(), UnifiedExecError> {
+        skip_if_sandbox!(Ok(()));
+
         let manager = UnifiedExecSessionManager::default();
 
         let open_shell = manager
@@ -601,6 +609,8 @@ mod tests {
     #[cfg(unix)]
     #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
     async fn reusing_completed_session_returns_unknown_session() -> Result<(), UnifiedExecError> {
+        skip_if_sandbox!(Ok(()));
+
         let manager = UnifiedExecSessionManager::default();
 
         let open_shell = manager
diff --git a/codex-rs/core/tests/common/lib.rs b/codex-rs/core/tests/common/lib.rs
index 0fdd6038..8b272681 100644
--- a/codex-rs/core/tests/common/lib.rs
+++ b/codex-rs/core/tests/common/lib.rs
@@ -128,20 +128,56 @@ where
     }
 }
 
+pub fn sandbox_env_var() -> &'static str {
+    codex_core::spawn::CODEX_SANDBOX_ENV_VAR
+}
+
+pub fn sandbox_network_env_var() -> &'static str {
+    codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
+}
+
 #[macro_export]
-macro_rules! non_sandbox_test {
-    // For tests that return ()
+macro_rules! skip_if_sandbox {
     () => {{
-        if ::std::env::var("CODEX_SANDBOX_NETWORK_DISABLED").is_ok() {
-            println!("Skipping test because it cannot execute when network is disabled in a Codex sandbox.");
+        if ::std::env::var($crate::sandbox_env_var())
+            == ::core::result::Result::Ok("seatbelt".to_string())
+        {
+            eprintln!(
+                "{} is set to 'seatbelt', skipping test.",
+                $crate::sandbox_env_var()
+            );
             return;
         }
     }};
-    // For tests that return Result<(), _>
-    (result $(,)?) => {{
-        if ::std::env::var("CODEX_SANDBOX_NETWORK_DISABLED").is_ok() {
-            println!("Skipping test because it cannot execute when network is disabled in a Codex sandbox.");
-            return ::core::result::Result::Ok(());
+    ($return_value:expr $(,)?) => {{
+        if ::std::env::var($crate::sandbox_env_var())
+            == ::core::result::Result::Ok("seatbelt".to_string())
+        {
+            eprintln!(
+                "{} is set to 'seatbelt', skipping test.",
+                $crate::sandbox_env_var()
+            );
+            return $return_value;
+        }
+    }};
+}
+
+#[macro_export]
+macro_rules! skip_if_no_network {
+    () => {{
+        if ::std::env::var($crate::sandbox_network_env_var()).is_ok() {
+            println!(
+                "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+            );
+            return;
+        }
+    }};
+    ($return_value:expr $(,)?) => {{
+        if ::std::env::var($crate::sandbox_network_env_var()).is_ok() {
+            println!(
+                "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+            );
+            return $return_value;
         }
     }};
 }
diff --git a/codex-rs/core/tests/suite/cli_stream.rs b/codex-rs/core/tests/suite/cli_stream.rs
index 368c47dc..5b698934 100644
--- a/codex-rs/core/tests/suite/cli_stream.rs
+++ b/codex-rs/core/tests/suite/cli_stream.rs
@@ -1,7 +1,7 @@
 use assert_cmd::Command as AssertCommand;
 use codex_core::RolloutRecorder;
 use codex_core::protocol::GitInfo;
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use std::time::Duration;
 use std::time::Instant;
 use tempfile::TempDir;
@@ -21,7 +21,7 @@ use wiremock::matchers::path;
 /// 4. Ensures the response is received exactly once and contains "hi"
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn chat_mode_stream_cli() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     let server = MockServer::start().await;
     let sse = concat!(
@@ -97,7 +97,7 @@ async fn chat_mode_stream_cli() {
 /// received by a mock OpenAI Responses endpoint.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn exec_cli_applies_experimental_instructions_file() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Start mock server which will capture the request and return a minimal
     // SSE stream for a single turn.
@@ -185,7 +185,7 @@ async fn exec_cli_applies_experimental_instructions_file() {
 /// 4. Ensures the fixture content is correctly streamed through the CLI
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn responses_api_stream_cli() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     let fixture =
         std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
@@ -217,7 +217,7 @@ async fn responses_api_stream_cli() {
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn integration_creates_and_checks_session_file() {
     // Honor sandbox network restrictions for CI parity with the other tests.
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // 1. Temp home so we read/write isolated session files.
     let home = TempDir::new().unwrap();
diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs
index 867accd7..a9b0bb1b 100644
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -21,8 +21,8 @@ use codex_protocol::models::ReasoningItemReasoningSummary;
 use codex_protocol::models::WebSearchAction;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
-use core_test_support::non_sandbox_test;
 use core_test_support::responses;
+use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use futures::StreamExt;
@@ -127,7 +127,7 @@ fn write_auth_json(
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn resume_includes_initial_messages_and_sends_prior_items() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Create a fake rollout session file with prior user + system + assistant messages.
     let tmpdir = TempDir::new().unwrap();
@@ -293,7 +293,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn includes_conversation_id_and_model_headers_in_request() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Mock server
     let server = MockServer::start().await;
@@ -418,7 +418,7 @@ async fn includes_base_instructions_override_in_request() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn chatgpt_auth_sends_correct_request() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Mock server
     let server = MockServer::start().await;
@@ -492,7 +492,7 @@ async fn chatgpt_auth_sends_correct_request() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn prefers_apikey_when_config_prefers_apikey_even_with_chatgpt_tokens() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Mock server
     let server = MockServer::start().await;
@@ -619,7 +619,7 @@ async fn includes_user_instructions_message_in_request() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn azure_responses_request_includes_store_and_reasoning_ids() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     let server = MockServer::start().await;
 
@@ -1141,7 +1141,7 @@ fn create_dummy_codex_auth() -> CodexAuth {
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn history_dedupes_streamed_and_final_messages_across_turns() {
     // Skip under Codex sandbox network restrictions (mirrors other tests).
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Mock server that will receive three sequential requests and return the same SSE stream
     // each time: a few deltas, then a final assistant message, then completed.
diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs
index 9e456e58..ede25588 100644
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -10,6 +10,7 @@ use codex_core::protocol::Op;
 use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
 use core_test_support::load_default_config_for_test;
+use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
 use tempfile::TempDir;
 use wiremock::Mock;
@@ -20,7 +21,6 @@ use wiremock::matchers::method;
 use wiremock::matchers::path;
 
 use codex_core::codex::compact::SUMMARIZATION_PROMPT;
-use core_test_support::non_sandbox_test;
 use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
 use core_test_support::responses::ev_completed_with_tokens;
@@ -53,7 +53,7 @@ const DUMMY_CALL_ID: &str = "call-multi-auto";
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn summarize_context_three_requests_and_instructions() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Set up a mock server that we can inspect after the run.
     let server = start_mock_server().await;
@@ -270,7 +270,7 @@ async fn summarize_context_three_requests_and_instructions() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn auto_compact_runs_after_token_limit_hit() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     let server = start_mock_server().await;
 
@@ -430,7 +430,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_persists_rollout_entries() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     let server = start_mock_server().await;
 
@@ -558,7 +558,7 @@ async fn auto_compact_persists_rollout_entries() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_stops_after_failed_attempt() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     let server = start_mock_server().await;
 
@@ -679,7 +679,7 @@ async fn auto_compact_stops_after_failed_attempt() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_events() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     let server = start_mock_server().await;
 
diff --git a/codex-rs/core/tests/suite/fork_conversation.rs b/codex-rs/core/tests/suite/fork_conversation.rs
index f3027811..44c0cd81 100644
--- a/codex-rs/core/tests/suite/fork_conversation.rs
+++ b/codex-rs/core/tests/suite/fork_conversation.rs
@@ -14,6 +14,7 @@ use codex_core::protocol::Op;
 use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
 use core_test_support::load_default_config_for_test;
+use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
 use tempfile::TempDir;
 use wiremock::Mock;
@@ -29,6 +30,8 @@ fn sse_completed(id: &str) -> String {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn fork_conversation_twice_drops_to_first_message() {
+    skip_if_no_network!();
+
     // Start a mock server that completes three turns.
     let server = MockServer::start().await;
     let sse = sse_completed("resp");
diff --git a/codex-rs/core/tests/suite/json_result.rs b/codex-rs/core/tests/suite/json_result.rs
index 86da33f7..90d92232 100644
--- a/codex-rs/core/tests/suite/json_result.rs
+++ b/codex-rs/core/tests/suite/json_result.rs
@@ -6,8 +6,8 @@ use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
 use codex_core::protocol::SandboxPolicy;
 use codex_protocol::config_types::ReasoningSummary;
-use core_test_support::non_sandbox_test;
 use core_test_support::responses;
+use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
@@ -40,7 +40,7 @@ async fn codex_returns_json_result_for_gpt5_codex() -> anyhow::Result<()> {
 }
 
 async fn codex_returns_json_result(model: String) -> anyhow::Result<()> {
-    non_sandbox_test!(result);
+    skip_if_no_network!(Ok(()));
 
     let server = start_mock_server().await;
 
diff --git a/codex-rs/core/tests/suite/prompt_caching.rs b/codex-rs/core/tests/suite/prompt_caching.rs
index 147cf27d..79be6083 100644
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -16,6 +16,7 @@ use codex_core::shell::Shell;
 use codex_core::shell::default_user_shell;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
+use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
 use tempfile::TempDir;
 use wiremock::Mock;
@@ -67,6 +68,7 @@ fn assert_tool_names(body: &serde_json::Value, expected_names: &[&str]) {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn codex_mini_latest_tools() {
+    skip_if_no_network!();
     use pretty_assertions::assert_eq;
 
     let server = MockServer::start().await;
@@ -151,6 +153,7 @@ async fn codex_mini_latest_tools() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn prompt_tools_are_consistent_across_requests() {
+    skip_if_no_network!();
     use pretty_assertions::assert_eq;
 
     let server = MockServer::start().await;
@@ -234,6 +237,7 @@ async fn prompt_tools_are_consistent_across_requests() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn prefixes_context_and_instructions_once_and_consistently_across_requests() {
+    skip_if_no_network!();
     use pretty_assertions::assert_eq;
 
     let server = MockServer::start().await;
@@ -352,6 +356,7 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
+    skip_if_no_network!();
     use pretty_assertions::assert_eq;
 
     let server = MockServer::start().await;
@@ -479,6 +484,7 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
+    skip_if_no_network!();
     use pretty_assertions::assert_eq;
 
     let server = MockServer::start().await;
@@ -602,6 +608,7 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn send_user_turn_with_no_changes_does_not_send_environment_context() {
+    skip_if_no_network!();
     use pretty_assertions::assert_eq;
 
     let server = MockServer::start().await;
@@ -715,6 +722,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn send_user_turn_with_changes_sends_environment_context() {
+    skip_if_no_network!();
     use pretty_assertions::assert_eq;
 
     let server = MockServer::start().await;
diff --git a/codex-rs/core/tests/suite/review.rs b/codex-rs/core/tests/suite/review.rs
index d511946a..d54fc0b4 100644
--- a/codex-rs/core/tests/suite/review.rs
+++ b/codex-rs/core/tests/suite/review.rs
@@ -22,7 +22,7 @@ use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id_from_str;
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
 use pretty_assertions::assert_eq;
 use std::path::PathBuf;
@@ -42,7 +42,7 @@ use wiremock::matchers::path;
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn review_op_emits_lifecycle_and_review_output() {
     // Skip under Codex sandbox network restrictions.
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Start mock Responses API server. Return a single assistant message whose
     // text is a JSON-encoded ReviewOutputEvent.
@@ -167,7 +167,7 @@ async fn review_op_emits_lifecycle_and_review_output() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_op_with_plain_text_emits_review_fallback() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     let sse_raw = r#"[
         {"type":"response.output_item.done", "item":{
@@ -216,7 +216,7 @@ async fn review_op_with_plain_text_emits_review_fallback() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_does_not_emit_agent_message_on_structured_output() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     let review_json = serde_json::json!({
         "findings": [
@@ -288,7 +288,7 @@ async fn review_does_not_emit_agent_message_on_structured_output() {
 /// request uses that model (and not the main chat model).
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn review_uses_custom_review_model_from_config() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Minimal stream: just a completed event
     let sse_raw = r#"[
@@ -341,7 +341,7 @@ async fn review_uses_custom_review_model_from_config() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_input_isolated_from_parent_history() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Mock server for the single review request
     let sse_raw = r#"[
@@ -517,7 +517,7 @@ async fn review_input_isolated_from_parent_history() {
 /// messages in its request `input`.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn review_history_does_not_leak_into_parent_session() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Respond to both the review request and the subsequent parent request.
     let sse_raw = r#"[
diff --git a/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs b/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs
index 14bff334..9c32c351 100644
--- a/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs
+++ b/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs
@@ -6,7 +6,7 @@ use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
 use core_test_support::load_sse_fixture_with_id;
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event_with_timeout;
@@ -23,7 +23,7 @@ fn sse_completed(id: &str) -> String {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn continue_after_stream_error() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     let server = MockServer::start().await;
 
diff --git a/codex-rs/core/tests/suite/stream_no_completed.rs b/codex-rs/core/tests/suite/stream_no_completed.rs
index db55c7a4..ce33d807 100644
--- a/codex-rs/core/tests/suite/stream_no_completed.rs
+++ b/codex-rs/core/tests/suite/stream_no_completed.rs
@@ -10,7 +10,7 @@ use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
 use core_test_support::load_sse_fixture;
 use core_test_support::load_sse_fixture_with_id;
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
 use tokio::time::timeout;
@@ -32,7 +32,7 @@ fn sse_completed(id: &str) -> String {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn retries_on_early_close() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     let server = MockServer::start().await;
 
diff --git a/codex-rs/core/tests/suite/user_notification.rs b/codex-rs/core/tests/suite/user_notification.rs
index c7acf35e..c1622394 100644
--- a/codex-rs/core/tests/suite/user_notification.rs
+++ b/codex-rs/core/tests/suite/user_notification.rs
@@ -5,8 +5,8 @@ use std::os::unix::fs::PermissionsExt;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
-use core_test_support::non_sandbox_test;
 use core_test_support::responses;
+use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
@@ -22,7 +22,7 @@ use tokio::time::sleep;
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn summarize_context_three_requests_and_instructions() -> anyhow::Result<()> {
-    non_sandbox_test!(result);
+    skip_if_no_network!(Ok(()));
 
     let server = start_mock_server().await;
 
diff --git a/codex-rs/exec/tests/suite/apply_patch.rs b/codex-rs/exec/tests/suite/apply_patch.rs
index 9db1f1cf..da19710d 100644
--- a/codex-rs/exec/tests/suite/apply_patch.rs
+++ b/codex-rs/exec/tests/suite/apply_patch.rs
@@ -48,9 +48,9 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_tool() -> anyhow::Result<()> {
     use crate::suite::common::run_e2e_exec_test;
-    use core_test_support::non_sandbox_test;
+    use core_test_support::skip_if_no_network;
 
-    non_sandbox_test!(result);
+    skip_if_no_network!(Ok(()));
 
     let tmp_cwd = tempdir().expect("failed to create temp dir");
     let tmp_path = tmp_cwd.path().to_path_buf();
@@ -88,9 +88,9 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
     use crate::suite::common::run_e2e_exec_test;
-    use core_test_support::non_sandbox_test;
+    use core_test_support::skip_if_no_network;
 
-    non_sandbox_test!(result);
+    skip_if_no_network!(Ok(()));
 
     let tmp_cwd = tempdir().expect("failed to create temp dir");
     let freeform_add_patch = r#"*** Begin Patch
diff --git a/codex-rs/exec/tests/suite/sandbox.rs b/codex-rs/exec/tests/suite/sandbox.rs
index 5355a0b2..73a7f0d5 100644
--- a/codex-rs/exec/tests/suite/sandbox.rs
+++ b/codex-rs/exec/tests/suite/sandbox.rs
@@ -56,6 +56,7 @@ async fn spawn_command_under_sandbox(
 
 #[tokio::test]
 async fn python_multiprocessing_lock_works_under_sandbox() {
+    core_test_support::skip_if_sandbox!();
     #[cfg(target_os = "macos")]
     let writable_roots = Vec::<PathBuf>::new();
 
@@ -110,6 +111,7 @@ if __name__ == '__main__':
 
 #[tokio::test]
 async fn sandbox_distinguishes_command_and_policy_cwds() {
+    core_test_support::skip_if_sandbox!();
     let temp = tempfile::tempdir().expect("should be able to create temp dir");
     let sandbox_root = temp.path().join("sandbox");
     let command_root = temp.path().join("command");
diff --git a/codex-rs/login/tests/suite/login_server_e2e.rs b/codex-rs/login/tests/suite/login_server_e2e.rs
index 6d8dbd6d..a6e0de26 100644
--- a/codex-rs/login/tests/suite/login_server_e2e.rs
+++ b/codex-rs/login/tests/suite/login_server_e2e.rs
@@ -9,7 +9,7 @@ use anyhow::Result;
 use base64::Engine;
 use codex_login::ServerOptions;
 use codex_login::run_login_server;
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use tempfile::tempdir;
 
 // See spawn.rs for details
@@ -78,7 +78,7 @@ fn start_mock_issuer() -> (SocketAddr, thread::JoinHandle<()>) {
 
 #[tokio::test]
 async fn end_to_end_login_flow_persists_auth_json() -> Result<()> {
-    non_sandbox_test!(result);
+    skip_if_no_network!(Ok(()));
 
     let (issuer_addr, issuer_handle) = start_mock_issuer();
     let issuer = format!("http://{}:{}", issuer_addr.ip(), issuer_addr.port());
@@ -147,7 +147,7 @@ async fn end_to_end_login_flow_persists_auth_json() -> Result<()> {
 
 #[tokio::test]
 async fn creates_missing_codex_home_dir() -> Result<()> {
-    non_sandbox_test!(result);
+    skip_if_no_network!(Ok(()));
 
     let (issuer_addr, _issuer_handle) = start_mock_issuer();
     let issuer = format!("http://{}:{}", issuer_addr.ip(), issuer_addr.port());
@@ -187,7 +187,7 @@ async fn creates_missing_codex_home_dir() -> Result<()> {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn cancels_previous_login_server_when_port_is_in_use() -> Result<()> {
-    non_sandbox_test!(result);
+    skip_if_no_network!(Ok(()));
 
     let (issuer_addr, _issuer_handle) = start_mock_issuer();
     let issuer = format!("http://{}:{}", issuer_addr.ip(), issuer_addr.port());
diff --git a/codex-rs/mcp-server/tests/suite/codex_tool.rs b/codex-rs/mcp-server/tests/suite/codex_tool.rs
index a111539d..13ce3864 100644
--- a/codex-rs/mcp-server/tests/suite/codex_tool.rs
+++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs
@@ -24,7 +24,7 @@ use tempfile::TempDir;
 use tokio::time::timeout;
 use wiremock::MockServer;
 
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use mcp_test_support::McpProcess;
 use mcp_test_support::create_apply_patch_sse_response;
 use mcp_test_support::create_final_assistant_message_sse_response;
@@ -308,7 +308,7 @@ async fn patch_approval_triggers_elicitation() -> anyhow::Result<()> {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn test_codex_tool_passes_base_instructions() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     // Apparently `#[tokio::test]` must return `()`, so we create a helper
     // function that returns `Result` so we can use `?` in favor of `unwrap`.
diff --git a/codex-rs/mcp-server/tests/suite/interrupt.rs b/codex-rs/mcp-server/tests/suite/interrupt.rs
index e4daeae0..232b695d 100644
--- a/codex-rs/mcp-server/tests/suite/interrupt.rs
+++ b/codex-rs/mcp-server/tests/suite/interrupt.rs
@@ -11,7 +11,7 @@ use codex_protocol::mcp_protocol::NewConversationParams;
 use codex_protocol::mcp_protocol::NewConversationResponse;
 use codex_protocol::mcp_protocol::SendUserMessageParams;
 use codex_protocol::mcp_protocol::SendUserMessageResponse;
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use mcp_types::JSONRPCResponse;
 use mcp_types::RequestId;
 use tempfile::TempDir;
@@ -26,7 +26,7 @@ const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn test_shell_command_interruption() {
-    non_sandbox_test!();
+    skip_if_no_network!();
 
     if let Err(err) = shell_command_interruption().await {
         panic!("failure: {err}");