Include output truncation message in tool call results (#2183)
To avoid model being confused about incomplete output.
This commit is contained in:
@@ -51,6 +51,7 @@ use crate::exec::ExecParams;
|
|||||||
use crate::exec::ExecToolCallOutput;
|
use crate::exec::ExecToolCallOutput;
|
||||||
use crate::exec::SandboxType;
|
use crate::exec::SandboxType;
|
||||||
use crate::exec::StdoutStream;
|
use crate::exec::StdoutStream;
|
||||||
|
use crate::exec::StreamOutput;
|
||||||
use crate::exec::process_exec_tool_call;
|
use crate::exec::process_exec_tool_call;
|
||||||
use crate::exec_env::create_env;
|
use crate::exec_env::create_env;
|
||||||
use crate::mcp_connection_manager::McpConnectionManager;
|
use crate::mcp_connection_manager::McpConnectionManager;
|
||||||
@@ -431,8 +432,8 @@ impl Session {
|
|||||||
// Because stdout and stderr could each be up to 100 KiB, we send
|
// Because stdout and stderr could each be up to 100 KiB, we send
|
||||||
// truncated versions.
|
// truncated versions.
|
||||||
const MAX_STREAM_OUTPUT: usize = 5 * 1024; // 5KiB
|
const MAX_STREAM_OUTPUT: usize = 5 * 1024; // 5KiB
|
||||||
let stdout = stdout.chars().take(MAX_STREAM_OUTPUT).collect();
|
let stdout = stdout.text.chars().take(MAX_STREAM_OUTPUT).collect();
|
||||||
let stderr = stderr.chars().take(MAX_STREAM_OUTPUT).collect();
|
let stderr = stderr.text.chars().take(MAX_STREAM_OUTPUT).collect();
|
||||||
|
|
||||||
let msg = if is_apply_patch {
|
let msg = if is_apply_patch {
|
||||||
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||||
@@ -504,8 +505,8 @@ impl Session {
|
|||||||
Err(e) => {
|
Err(e) => {
|
||||||
output_stderr = ExecToolCallOutput {
|
output_stderr = ExecToolCallOutput {
|
||||||
exit_code: -1,
|
exit_code: -1,
|
||||||
stdout: String::new(),
|
stdout: StreamOutput::new(String::new()),
|
||||||
stderr: get_error_message_ui(e),
|
stderr: StreamOutput::new(get_error_message_ui(e)),
|
||||||
duration: Duration::default(),
|
duration: Duration::default(),
|
||||||
};
|
};
|
||||||
&output_stderr
|
&output_stderr
|
||||||
@@ -1977,19 +1978,10 @@ async fn handle_container_exec_with_params(
|
|||||||
|
|
||||||
match output_result {
|
match output_result {
|
||||||
Ok(output) => {
|
Ok(output) => {
|
||||||
let ExecToolCallOutput {
|
let ExecToolCallOutput { exit_code, .. } = &output;
|
||||||
exit_code,
|
|
||||||
stdout,
|
|
||||||
stderr,
|
|
||||||
duration,
|
|
||||||
} = &output;
|
|
||||||
|
|
||||||
let is_success = *exit_code == 0;
|
let is_success = *exit_code == 0;
|
||||||
let content = format_exec_output(
|
let content = format_exec_output(&output);
|
||||||
if is_success { stdout } else { stderr },
|
|
||||||
*exit_code,
|
|
||||||
*duration,
|
|
||||||
);
|
|
||||||
ResponseInputItem::FunctionCallOutput {
|
ResponseInputItem::FunctionCallOutput {
|
||||||
call_id: call_id.clone(),
|
call_id: call_id.clone(),
|
||||||
output: FunctionCallOutputPayload {
|
output: FunctionCallOutputPayload {
|
||||||
@@ -2118,19 +2110,10 @@ async fn handle_sandbox_error(
|
|||||||
|
|
||||||
match retry_output_result {
|
match retry_output_result {
|
||||||
Ok(retry_output) => {
|
Ok(retry_output) => {
|
||||||
let ExecToolCallOutput {
|
let ExecToolCallOutput { exit_code, .. } = &retry_output;
|
||||||
exit_code,
|
|
||||||
stdout,
|
|
||||||
stderr,
|
|
||||||
duration,
|
|
||||||
} = &retry_output;
|
|
||||||
|
|
||||||
let is_success = *exit_code == 0;
|
let is_success = *exit_code == 0;
|
||||||
let content = format_exec_output(
|
let content = format_exec_output(&retry_output);
|
||||||
if is_success { stdout } else { stderr },
|
|
||||||
*exit_code,
|
|
||||||
*duration,
|
|
||||||
);
|
|
||||||
|
|
||||||
ResponseInputItem::FunctionCallOutput {
|
ResponseInputItem::FunctionCallOutput {
|
||||||
call_id: call_id.clone(),
|
call_id: call_id.clone(),
|
||||||
@@ -2163,7 +2146,14 @@ async fn handle_sandbox_error(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Exec output is a pre-serialized JSON payload
|
/// Exec output is a pre-serialized JSON payload
|
||||||
fn format_exec_output(output: &str, exit_code: i32, duration: Duration) -> String {
|
fn format_exec_output(exec_output: &ExecToolCallOutput) -> String {
|
||||||
|
let ExecToolCallOutput {
|
||||||
|
exit_code,
|
||||||
|
stdout,
|
||||||
|
stderr,
|
||||||
|
duration,
|
||||||
|
} = exec_output;
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct ExecMetadata {
|
struct ExecMetadata {
|
||||||
exit_code: i32,
|
exit_code: i32,
|
||||||
@@ -2179,10 +2169,20 @@ fn format_exec_output(output: &str, exit_code: i32, duration: Duration) -> Strin
|
|||||||
// round to 1 decimal place
|
// round to 1 decimal place
|
||||||
let duration_seconds = ((duration.as_secs_f32()) * 10.0).round() / 10.0;
|
let duration_seconds = ((duration.as_secs_f32()) * 10.0).round() / 10.0;
|
||||||
|
|
||||||
|
let is_success = *exit_code == 0;
|
||||||
|
let output = if is_success { stdout } else { stderr };
|
||||||
|
|
||||||
|
let mut formatted_output = output.text.clone();
|
||||||
|
if let Some(truncated_after_lines) = output.truncated_after_lines {
|
||||||
|
formatted_output.push_str(&format!(
|
||||||
|
"\n\n[Output truncated after {truncated_after_lines} lines: too many lines or bytes.]",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
let payload = ExecOutput {
|
let payload = ExecOutput {
|
||||||
output,
|
output: &formatted_output,
|
||||||
metadata: ExecMetadata {
|
metadata: ExecMetadata {
|
||||||
exit_code,
|
exit_code: *exit_code,
|
||||||
duration_seconds,
|
duration_seconds,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -130,8 +130,8 @@ pub async fn process_exec_tool_call(
|
|||||||
let duration = start.elapsed();
|
let duration = start.elapsed();
|
||||||
match raw_output_result {
|
match raw_output_result {
|
||||||
Ok(raw_output) => {
|
Ok(raw_output) => {
|
||||||
let stdout = String::from_utf8_lossy(&raw_output.stdout).to_string();
|
let stdout = raw_output.stdout.from_utf8_lossy();
|
||||||
let stderr = String::from_utf8_lossy(&raw_output.stderr).to_string();
|
let stderr = raw_output.stderr.from_utf8_lossy();
|
||||||
|
|
||||||
#[cfg(target_family = "unix")]
|
#[cfg(target_family = "unix")]
|
||||||
match raw_output.exit_status.signal() {
|
match raw_output.exit_status.signal() {
|
||||||
@@ -146,7 +146,9 @@ pub async fn process_exec_tool_call(
|
|||||||
|
|
||||||
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
|
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
|
||||||
return Err(CodexErr::Sandbox(SandboxErr::Denied(
|
return Err(CodexErr::Sandbox(SandboxErr::Denied(
|
||||||
exit_code, stdout, stderr,
|
exit_code,
|
||||||
|
stdout.text,
|
||||||
|
stderr.text,
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -243,18 +245,41 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
|
|||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct StreamOutput<T> {
|
||||||
|
pub text: T,
|
||||||
|
pub truncated_after_lines: Option<u32>,
|
||||||
|
}
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct RawExecToolCallOutput {
|
pub struct RawExecToolCallOutput {
|
||||||
pub exit_status: ExitStatus,
|
pub exit_status: ExitStatus,
|
||||||
pub stdout: Vec<u8>,
|
pub stdout: StreamOutput<Vec<u8>>,
|
||||||
pub stderr: Vec<u8>,
|
pub stderr: StreamOutput<Vec<u8>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StreamOutput<String> {
|
||||||
|
pub fn new(text: String) -> Self {
|
||||||
|
Self {
|
||||||
|
text,
|
||||||
|
truncated_after_lines: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StreamOutput<Vec<u8>> {
|
||||||
|
pub fn from_utf8_lossy(&self) -> StreamOutput<String> {
|
||||||
|
StreamOutput {
|
||||||
|
text: String::from_utf8_lossy(&self.text).to_string(),
|
||||||
|
truncated_after_lines: self.truncated_after_lines,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ExecToolCallOutput {
|
pub struct ExecToolCallOutput {
|
||||||
pub exit_code: i32,
|
pub exit_code: i32,
|
||||||
pub stdout: String,
|
pub stdout: StreamOutput<String>,
|
||||||
pub stderr: String,
|
pub stderr: StreamOutput<String>,
|
||||||
pub duration: Duration,
|
pub duration: Duration,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -363,7 +388,7 @@ async fn read_capped<R: AsyncRead + Unpin + Send + 'static>(
|
|||||||
max_lines: usize,
|
max_lines: usize,
|
||||||
stream: Option<StdoutStream>,
|
stream: Option<StdoutStream>,
|
||||||
is_stderr: bool,
|
is_stderr: bool,
|
||||||
) -> io::Result<Vec<u8>> {
|
) -> io::Result<StreamOutput<Vec<u8>>> {
|
||||||
let mut buf = Vec::with_capacity(max_output.min(8 * 1024));
|
let mut buf = Vec::with_capacity(max_output.min(8 * 1024));
|
||||||
let mut tmp = [0u8; 8192];
|
let mut tmp = [0u8; 8192];
|
||||||
|
|
||||||
@@ -413,7 +438,16 @@ async fn read_capped<R: AsyncRead + Unpin + Send + 'static>(
|
|||||||
// Continue reading to EOF to avoid back-pressure, but discard once caps are hit.
|
// Continue reading to EOF to avoid back-pressure, but discard once caps are hit.
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(buf)
|
let truncated = remaining_lines == 0 || remaining_bytes == 0;
|
||||||
|
|
||||||
|
Ok(StreamOutput {
|
||||||
|
text: buf,
|
||||||
|
truncated_after_lines: if truncated {
|
||||||
|
Some((max_lines - remaining_lines) as u32)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
},
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
|
|||||||
@@ -230,7 +230,7 @@ mod tests {
|
|||||||
assert_eq!(output.exit_code, 0, "input: {input:?} output: {output:?}");
|
assert_eq!(output.exit_code, 0, "input: {input:?} output: {output:?}");
|
||||||
if let Some(expected) = expected_output {
|
if let Some(expected) = expected_output {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
output.stdout, expected,
|
output.stdout.text, expected,
|
||||||
"input: {input:?} output: {output:?}"
|
"input: {input:?} output: {output:?}"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
#![cfg(target_os = "macos")]
|
#![cfg(target_os = "macos")]
|
||||||
#![expect(clippy::expect_used)]
|
#![expect(clippy::unwrap_used, clippy::expect_used)]
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use codex_core::exec::ExecParams;
|
use codex_core::exec::ExecParams;
|
||||||
|
use codex_core::exec::ExecToolCallOutput;
|
||||||
use codex_core::exec::SandboxType;
|
use codex_core::exec::SandboxType;
|
||||||
use codex_core::exec::process_exec_tool_call;
|
use codex_core::exec::process_exec_tool_call;
|
||||||
use codex_core::protocol::SandboxPolicy;
|
use codex_core::protocol::SandboxPolicy;
|
||||||
@@ -12,14 +13,20 @@ use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
|
|||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
use tokio::sync::Notify;
|
use tokio::sync::Notify;
|
||||||
|
|
||||||
|
use codex_core::error::Result;
|
||||||
|
|
||||||
use codex_core::get_platform_sandbox;
|
use codex_core::get_platform_sandbox;
|
||||||
|
|
||||||
async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) {
|
fn skip_test() -> bool {
|
||||||
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
||||||
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput> {
|
||||||
let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type");
|
let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type");
|
||||||
assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);
|
assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);
|
||||||
|
|
||||||
@@ -35,31 +42,82 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) {
|
|||||||
let ctrl_c = Arc::new(Notify::new());
|
let ctrl_c = Arc::new(Notify::new());
|
||||||
let policy = SandboxPolicy::new_read_only_policy();
|
let policy = SandboxPolicy::new_read_only_policy();
|
||||||
|
|
||||||
let result = process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await;
|
process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await
|
||||||
|
|
||||||
assert!(result.is_ok() == should_be_ok);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Command succeeds with exit code 0 normally
|
/// Command succeeds with exit code 0 normally
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn exit_code_0_succeeds() {
|
async fn exit_code_0_succeeds() {
|
||||||
|
if skip_test() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||||
let cmd = vec!["echo", "hello"];
|
let cmd = vec!["echo", "hello"];
|
||||||
|
|
||||||
run_test_cmd(tmp, cmd, true).await
|
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
||||||
|
assert_eq!(output.stdout.text, "hello\n");
|
||||||
|
assert_eq!(output.stderr.text, "");
|
||||||
|
assert_eq!(output.stdout.truncated_after_lines, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Command succeeds with exit code 0 normally
|
||||||
|
#[tokio::test]
|
||||||
|
async fn truncates_output_lines() {
|
||||||
|
if skip_test() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||||
|
let cmd = vec!["seq", "300"];
|
||||||
|
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
|
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
||||||
|
|
||||||
|
let expected_output = (1..=256)
|
||||||
|
.map(|i| format!("{i}\n"))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("");
|
||||||
|
assert_eq!(output.stdout.text, expected_output);
|
||||||
|
assert_eq!(output.stdout.truncated_after_lines, Some(256));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Command succeeds with exit code 0 normally
|
||||||
|
#[tokio::test]
|
||||||
|
async fn truncates_output_bytes() {
|
||||||
|
if skip_test() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||||
|
// each line is 1000 bytes
|
||||||
|
let cmd = vec!["bash", "-lc", "seq 15 | awk '{printf \"%-1000s\\n\", $0}'"];
|
||||||
|
|
||||||
|
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(output.stdout.text.len(), 10240);
|
||||||
|
assert_eq!(output.stdout.truncated_after_lines, Some(10));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Command not found returns exit code 127, this is not considered a sandbox error
|
/// Command not found returns exit code 127, this is not considered a sandbox error
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn exit_command_not_found_is_ok() {
|
async fn exit_command_not_found_is_ok() {
|
||||||
|
if skip_test() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||||
let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"];
|
let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"];
|
||||||
run_test_cmd(tmp, cmd, true).await
|
run_test_cmd(tmp, cmd).await.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Writing a file fails and should be considered a sandbox error
|
/// Writing a file fails and should be considered a sandbox error
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn write_file_fails_as_sandbox_error() {
|
async fn write_file_fails_as_sandbox_error() {
|
||||||
|
if skip_test() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||||
let path = tmp.path().join("test.txt");
|
let path = tmp.path().join("test.txt");
|
||||||
let cmd = vec![
|
let cmd = vec![
|
||||||
@@ -67,5 +125,5 @@ async fn write_file_fails_as_sandbox_error() {
|
|||||||
path.to_str().expect("should be able to get path"),
|
path.to_str().expect("should be able to get path"),
|
||||||
];
|
];
|
||||||
|
|
||||||
run_test_cmd(tmp, cmd, false).await;
|
assert!(run_test_cmd(tmp, cmd).await.is_err());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ async fn test_exec_stdout_stream_events_echo() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
assert_eq!(result.exit_code, 0);
|
assert_eq!(result.exit_code, 0);
|
||||||
assert_eq!(result.stdout, "hello-world\n");
|
assert_eq!(result.stdout.text, "hello-world\n");
|
||||||
|
|
||||||
let streamed = collect_stdout_events(rx);
|
let streamed = collect_stdout_events(rx);
|
||||||
// We should have received at least the same contents (possibly in one chunk)
|
// We should have received at least the same contents (possibly in one chunk)
|
||||||
@@ -128,8 +128,8 @@ async fn test_exec_stderr_stream_events_echo() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
assert_eq!(result.exit_code, 0);
|
assert_eq!(result.exit_code, 0);
|
||||||
assert_eq!(result.stdout, "");
|
assert_eq!(result.stdout.text, "");
|
||||||
assert_eq!(result.stderr, "oops\n");
|
assert_eq!(result.stderr.text, "oops\n");
|
||||||
|
|
||||||
// Collect only stderr delta events
|
// Collect only stderr delta events
|
||||||
let mut err = Vec::new();
|
let mut err = Vec::new();
|
||||||
|
|||||||
@@ -72,8 +72,8 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
if res.exit_code != 0 {
|
if res.exit_code != 0 {
|
||||||
println!("stdout:\n{}", res.stdout);
|
println!("stdout:\n{}", res.stdout.text);
|
||||||
println!("stderr:\n{}", res.stderr);
|
println!("stderr:\n{}", res.stderr.text);
|
||||||
panic!("exit code: {}", res.exit_code);
|
panic!("exit code: {}", res.exit_code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -164,7 +164,7 @@ async fn assert_network_blocked(cmd: &[&str]) {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
let (exit_code, stdout, stderr) = match result {
|
let (exit_code, stdout, stderr) = match result {
|
||||||
Ok(output) => (output.exit_code, output.stdout, output.stderr),
|
Ok(output) => (output.exit_code, output.stdout.text, output.stderr.text),
|
||||||
Err(CodexErr::Sandbox(SandboxErr::Denied(exit_code, stdout, stderr))) => {
|
Err(CodexErr::Sandbox(SandboxErr::Denied(exit_code, stdout, stderr))) => {
|
||||||
(exit_code, stdout, stderr)
|
(exit_code, stdout, stderr)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user