Include output truncation message in tool call results (#2183)
To avoid model being confused about incomplete output.
This commit is contained in:
@@ -1,10 +1,11 @@
|
||||
#![cfg(target_os = "macos")]
|
||||
#![expect(clippy::expect_used)]
|
||||
#![expect(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use codex_core::exec::ExecParams;
|
||||
use codex_core::exec::ExecToolCallOutput;
|
||||
use codex_core::exec::SandboxType;
|
||||
use codex_core::exec::process_exec_tool_call;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
@@ -12,14 +13,20 @@ use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
|
||||
use tempfile::TempDir;
|
||||
use tokio::sync::Notify;
|
||||
|
||||
use codex_core::error::Result;
|
||||
|
||||
use codex_core::get_platform_sandbox;
|
||||
|
||||
async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) {
|
||||
fn skip_test() -> bool {
|
||||
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
||||
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput> {
|
||||
let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type");
|
||||
assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);
|
||||
|
||||
@@ -35,31 +42,82 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) {
|
||||
let ctrl_c = Arc::new(Notify::new());
|
||||
let policy = SandboxPolicy::new_read_only_policy();
|
||||
|
||||
let result = process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await;
|
||||
|
||||
assert!(result.is_ok() == should_be_ok);
|
||||
process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await
|
||||
}
|
||||
|
||||
/// Command succeeds with exit code 0 normally
|
||||
#[tokio::test]
|
||||
async fn exit_code_0_succeeds() {
|
||||
if skip_test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let cmd = vec!["echo", "hello"];
|
||||
|
||||
run_test_cmd(tmp, cmd, true).await
|
||||
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
||||
assert_eq!(output.stdout.text, "hello\n");
|
||||
assert_eq!(output.stderr.text, "");
|
||||
assert_eq!(output.stdout.truncated_after_lines, None);
|
||||
}
|
||||
|
||||
/// Command succeeds with exit code 0 normally
|
||||
#[tokio::test]
|
||||
async fn truncates_output_lines() {
|
||||
if skip_test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let cmd = vec!["seq", "300"];
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
||||
|
||||
let expected_output = (1..=256)
|
||||
.map(|i| format!("{i}\n"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("");
|
||||
assert_eq!(output.stdout.text, expected_output);
|
||||
assert_eq!(output.stdout.truncated_after_lines, Some(256));
|
||||
}
|
||||
|
||||
/// Command succeeds with exit code 0 normally
|
||||
#[tokio::test]
|
||||
async fn truncates_output_bytes() {
|
||||
if skip_test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
// each line is 1000 bytes
|
||||
let cmd = vec!["bash", "-lc", "seq 15 | awk '{printf \"%-1000s\\n\", $0}'"];
|
||||
|
||||
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
||||
|
||||
assert_eq!(output.stdout.text.len(), 10240);
|
||||
assert_eq!(output.stdout.truncated_after_lines, Some(10));
|
||||
}
|
||||
|
||||
/// Command not found returns exit code 127, this is not considered a sandbox error
|
||||
#[tokio::test]
|
||||
async fn exit_command_not_found_is_ok() {
|
||||
if skip_test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"];
|
||||
run_test_cmd(tmp, cmd, true).await
|
||||
run_test_cmd(tmp, cmd).await.unwrap();
|
||||
}
|
||||
|
||||
/// Writing a file fails and should be considered a sandbox error
|
||||
#[tokio::test]
|
||||
async fn write_file_fails_as_sandbox_error() {
|
||||
if skip_test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let path = tmp.path().join("test.txt");
|
||||
let cmd = vec![
|
||||
@@ -67,5 +125,5 @@ async fn write_file_fails_as_sandbox_error() {
|
||||
path.to_str().expect("should be able to get path"),
|
||||
];
|
||||
|
||||
run_test_cmd(tmp, cmd, false).await;
|
||||
assert!(run_test_cmd(tmp, cmd).await.is_err());
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@ async fn test_exec_stdout_stream_events_echo() {
|
||||
};
|
||||
|
||||
assert_eq!(result.exit_code, 0);
|
||||
assert_eq!(result.stdout, "hello-world\n");
|
||||
assert_eq!(result.stdout.text, "hello-world\n");
|
||||
|
||||
let streamed = collect_stdout_events(rx);
|
||||
// We should have received at least the same contents (possibly in one chunk)
|
||||
@@ -128,8 +128,8 @@ async fn test_exec_stderr_stream_events_echo() {
|
||||
};
|
||||
|
||||
assert_eq!(result.exit_code, 0);
|
||||
assert_eq!(result.stdout, "");
|
||||
assert_eq!(result.stderr, "oops\n");
|
||||
assert_eq!(result.stdout.text, "");
|
||||
assert_eq!(result.stderr.text, "oops\n");
|
||||
|
||||
// Collect only stderr delta events
|
||||
let mut err = Vec::new();
|
||||
|
||||
Reference in New Issue
Block a user