From 88e083a9d0473cfa5f03318433fbb52f88691be5 Mon Sep 17 00:00:00 2001 From: Dylan Hurd Date: Fri, 31 Oct 2025 11:01:58 -0700 Subject: [PATCH] chore: Add shell serialization tests for json (#6043) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Can never have enough tests on this code path - checking that json inside a shell call is deserialized correctly. ## Tests - [x] These are tests 😎 --- .../core/tests/suite/shell_serialization.rs | 160 ++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/codex-rs/core/tests/suite/shell_serialization.rs b/codex-rs/core/tests/suite/shell_serialization.rs index 50d0c999..d748fe6b 100644 --- a/codex-rs/core/tests/suite/shell_serialization.rs +++ b/codex-rs/core/tests/suite/shell_serialization.rs @@ -30,6 +30,18 @@ use serde_json::Value; use serde_json::json; use std::fs; +const FIXTURE_JSON: &str = r#"{ + "description": "This is an example JSON file.", + "foo": "bar", + "isTest": true, + "testNumber": 123, + "testArray": [1, 2, 3], + "testObject": { + "foo": "bar" + } +} +"#; + async fn submit_turn(test: &TestCodex, prompt: &str, sandbox_policy: SandboxPolicy) -> Result<()> { let session_model = test.session_configured.model.clone(); @@ -225,6 +237,154 @@ freeform shell Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn shell_output_preserves_fixture_json_without_serialization() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.features.disable(Feature::ApplyPatchFreeform); + config.model = "gpt-5".to_string(); + config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family"); + }); + let test = builder.build(&server).await?; + + let fixture_path = test.cwd.path().join("fixture.json"); + fs::write(&fixture_path, FIXTURE_JSON)?; + let fixture_path_str = fixture_path.to_string_lossy().to_string(); + + let call_id = "shell-json-fixture"; + let args = json!({ + "command": ["/usr/bin/sed", "-n", "p", fixture_path_str], + "timeout_ms": 1_000, + }); + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + mount_sse_sequence(&server, responses).await; + + submit_turn( + &test, + "read the fixture JSON with sed", + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = server + .received_requests() + .await + .expect("recorded requests present"); + let bodies = request_bodies(&requests)?; + let output_item = find_function_call_output(&bodies, call_id).expect("shell output present"); + let output = output_item + .get("output") + .and_then(Value::as_str) + .expect("shell output string"); + + let mut parsed: Value = serde_json::from_str(output)?; + if let Some(metadata) = parsed.get_mut("metadata").and_then(Value::as_object_mut) { + let _ = metadata.remove("duration_seconds"); + } + + assert_eq!( + parsed + .get("metadata") + .and_then(|metadata| metadata.get("exit_code")) + .and_then(Value::as_i64), + Some(0), + "expected zero exit code when serialization is disabled", + ); + let stdout = parsed + .get("output") + .and_then(Value::as_str) + .unwrap_or_default() + .to_string(); + assert_eq!( + stdout, FIXTURE_JSON, + "expected shell output to match the fixture contents" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn shell_output_structures_fixture_with_serialization() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.features.enable(Feature::ApplyPatchFreeform); + }); + let test = builder.build(&server).await?; + + let fixture_path = test.cwd.path().join("fixture.json"); + fs::write(&fixture_path, FIXTURE_JSON)?; + let fixture_path_str = fixture_path.to_string_lossy().to_string(); + + let call_id = "shell-structured-fixture"; + let args = json!({ + "command": ["/usr/bin/sed", "-n", "p", fixture_path_str], + "timeout_ms": 1_000, + }); + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + mount_sse_sequence(&server, responses).await; + + submit_turn( + &test, + "read the fixture JSON with structured output", + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = server + .received_requests() + .await + .expect("recorded requests present"); + let bodies = request_bodies(&requests)?; + let output_item = + find_function_call_output(&bodies, call_id).expect("structured output present"); + let output = output_item + .get("output") + .and_then(Value::as_str) + .expect("structured output string"); + + assert!( + serde_json::from_str::(output).is_err(), + "expected structured output to be plain text" + ); + let (header, body) = output + .split_once("Output:\n") + .expect("structured output contains an Output section"); + assert_regex_match( + r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds$", + header.trim_end(), + ); + assert_eq!( + body, FIXTURE_JSON, + "expected Output section to include the fixture contents" + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn shell_output_for_freeform_tool_records_duration() -> Result<()> { skip_if_no_network!(Ok(()));