chore: Add shell serialization tests for json (#6043)
## Summary
Can never have enough tests on this code path - checking that json
inside a shell call is deserialized correctly.
## Tests
- [x] These are tests 😎
This commit is contained in:
@@ -30,6 +30,18 @@ use serde_json::Value;
|
|||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
|
||||||
|
const FIXTURE_JSON: &str = r#"{
|
||||||
|
"description": "This is an example JSON file.",
|
||||||
|
"foo": "bar",
|
||||||
|
"isTest": true,
|
||||||
|
"testNumber": 123,
|
||||||
|
"testArray": [1, 2, 3],
|
||||||
|
"testObject": {
|
||||||
|
"foo": "bar"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"#;
|
||||||
|
|
||||||
async fn submit_turn(test: &TestCodex, prompt: &str, sandbox_policy: SandboxPolicy) -> Result<()> {
|
async fn submit_turn(test: &TestCodex, prompt: &str, sandbox_policy: SandboxPolicy) -> Result<()> {
|
||||||
let session_model = test.session_configured.model.clone();
|
let session_model = test.session_configured.model.clone();
|
||||||
|
|
||||||
@@ -225,6 +237,154 @@ freeform shell
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn shell_output_preserves_fixture_json_without_serialization() -> Result<()> {
|
||||||
|
skip_if_no_network!(Ok(()));
|
||||||
|
|
||||||
|
let server = start_mock_server().await;
|
||||||
|
let mut builder = test_codex().with_config(|config| {
|
||||||
|
config.features.disable(Feature::ApplyPatchFreeform);
|
||||||
|
config.model = "gpt-5".to_string();
|
||||||
|
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
|
||||||
|
});
|
||||||
|
let test = builder.build(&server).await?;
|
||||||
|
|
||||||
|
let fixture_path = test.cwd.path().join("fixture.json");
|
||||||
|
fs::write(&fixture_path, FIXTURE_JSON)?;
|
||||||
|
let fixture_path_str = fixture_path.to_string_lossy().to_string();
|
||||||
|
|
||||||
|
let call_id = "shell-json-fixture";
|
||||||
|
let args = json!({
|
||||||
|
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
|
||||||
|
"timeout_ms": 1_000,
|
||||||
|
});
|
||||||
|
let responses = vec![
|
||||||
|
sse(vec![
|
||||||
|
ev_response_created("resp-1"),
|
||||||
|
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||||
|
ev_completed("resp-1"),
|
||||||
|
]),
|
||||||
|
sse(vec![
|
||||||
|
ev_assistant_message("msg-1", "done"),
|
||||||
|
ev_completed("resp-2"),
|
||||||
|
]),
|
||||||
|
];
|
||||||
|
mount_sse_sequence(&server, responses).await;
|
||||||
|
|
||||||
|
submit_turn(
|
||||||
|
&test,
|
||||||
|
"read the fixture JSON with sed",
|
||||||
|
SandboxPolicy::DangerFullAccess,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let requests = server
|
||||||
|
.received_requests()
|
||||||
|
.await
|
||||||
|
.expect("recorded requests present");
|
||||||
|
let bodies = request_bodies(&requests)?;
|
||||||
|
let output_item = find_function_call_output(&bodies, call_id).expect("shell output present");
|
||||||
|
let output = output_item
|
||||||
|
.get("output")
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.expect("shell output string");
|
||||||
|
|
||||||
|
let mut parsed: Value = serde_json::from_str(output)?;
|
||||||
|
if let Some(metadata) = parsed.get_mut("metadata").and_then(Value::as_object_mut) {
|
||||||
|
let _ = metadata.remove("duration_seconds");
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parsed
|
||||||
|
.get("metadata")
|
||||||
|
.and_then(|metadata| metadata.get("exit_code"))
|
||||||
|
.and_then(Value::as_i64),
|
||||||
|
Some(0),
|
||||||
|
"expected zero exit code when serialization is disabled",
|
||||||
|
);
|
||||||
|
let stdout = parsed
|
||||||
|
.get("output")
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.to_string();
|
||||||
|
assert_eq!(
|
||||||
|
stdout, FIXTURE_JSON,
|
||||||
|
"expected shell output to match the fixture contents"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
|
||||||
|
skip_if_no_network!(Ok(()));
|
||||||
|
|
||||||
|
let server = start_mock_server().await;
|
||||||
|
let mut builder = test_codex().with_config(|config| {
|
||||||
|
config.features.enable(Feature::ApplyPatchFreeform);
|
||||||
|
});
|
||||||
|
let test = builder.build(&server).await?;
|
||||||
|
|
||||||
|
let fixture_path = test.cwd.path().join("fixture.json");
|
||||||
|
fs::write(&fixture_path, FIXTURE_JSON)?;
|
||||||
|
let fixture_path_str = fixture_path.to_string_lossy().to_string();
|
||||||
|
|
||||||
|
let call_id = "shell-structured-fixture";
|
||||||
|
let args = json!({
|
||||||
|
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
|
||||||
|
"timeout_ms": 1_000,
|
||||||
|
});
|
||||||
|
let responses = vec![
|
||||||
|
sse(vec![
|
||||||
|
ev_response_created("resp-1"),
|
||||||
|
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||||
|
ev_completed("resp-1"),
|
||||||
|
]),
|
||||||
|
sse(vec![
|
||||||
|
ev_assistant_message("msg-1", "done"),
|
||||||
|
ev_completed("resp-2"),
|
||||||
|
]),
|
||||||
|
];
|
||||||
|
mount_sse_sequence(&server, responses).await;
|
||||||
|
|
||||||
|
submit_turn(
|
||||||
|
&test,
|
||||||
|
"read the fixture JSON with structured output",
|
||||||
|
SandboxPolicy::DangerFullAccess,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let requests = server
|
||||||
|
.received_requests()
|
||||||
|
.await
|
||||||
|
.expect("recorded requests present");
|
||||||
|
let bodies = request_bodies(&requests)?;
|
||||||
|
let output_item =
|
||||||
|
find_function_call_output(&bodies, call_id).expect("structured output present");
|
||||||
|
let output = output_item
|
||||||
|
.get("output")
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.expect("structured output string");
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
serde_json::from_str::<Value>(output).is_err(),
|
||||||
|
"expected structured output to be plain text"
|
||||||
|
);
|
||||||
|
let (header, body) = output
|
||||||
|
.split_once("Output:\n")
|
||||||
|
.expect("structured output contains an Output section");
|
||||||
|
assert_regex_match(
|
||||||
|
r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds$",
|
||||||
|
header.trim_end(),
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
body, FIXTURE_JSON,
|
||||||
|
"expected Output section to include the fixture contents"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
async fn shell_output_for_freeform_tool_records_duration() -> Result<()> {
|
async fn shell_output_for_freeform_tool_records_duration() -> Result<()> {
|
||||||
skip_if_no_network!(Ok(()));
|
skip_if_no_network!(Ok(()));
|
||||||
|
|||||||
Reference in New Issue
Block a user