From 3eba86a55353f6b6f0efee19704b8d11e683b4ad Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Mon, 21 Apr 2025 19:06:03 -0700 Subject: [PATCH] include fractional portion of chunk that exceeds stdout/stderr limit (#497) I saw cases where the first chunk of output from `ls -R` could be large enough to exceed `MAX_OUTPUT_BYTES` or `MAX_OUTPUT_LINES`, in which case the loop would exit early in `createTruncatingCollector()` such that nothing was appended to the `chunks` array. As a result, the reported `stdout` of `ls -R` would be empty. I asked Codex to add logic to handle this edge case and write a unit test. I used this as my test: ``` ./codex-cli/dist/cli.js -q 'what is the output of `ls -R`' ``` now it appears to include a ton of stuff whereas before this change, I saw: ``` {"type":"function_call_output","call_id":"call_a2QhVt7HRJYKjb3dIc8w1aBB","output":"{\"output\":\"\\n\\n[Output truncated: too many lines or bytes]\",\"metadata\":{\"exit_code\":0,\"duration_seconds\":0.5}}"} ``` --- .../sandbox/create-truncating-collector.ts | 78 +++++++++++++++++++ codex-cli/src/utils/agent/sandbox/raw-exec.ts | 49 +----------- .../tests/create-truncating-collector.test.ts | 55 +++++++++++++ 3 files changed, 134 insertions(+), 48 deletions(-) create mode 100644 codex-cli/src/utils/agent/sandbox/create-truncating-collector.ts create mode 100644 codex-cli/tests/create-truncating-collector.test.ts diff --git a/codex-cli/src/utils/agent/sandbox/create-truncating-collector.ts b/codex-cli/src/utils/agent/sandbox/create-truncating-collector.ts new file mode 100644 index 00000000..518d475c --- /dev/null +++ b/codex-cli/src/utils/agent/sandbox/create-truncating-collector.ts @@ -0,0 +1,78 @@ +// Maximum output cap: either MAX_OUTPUT_LINES lines or MAX_OUTPUT_BYTES bytes, +// whichever limit is reached first. +const MAX_OUTPUT_BYTES = 1024 * 10; // 10 KB +const MAX_OUTPUT_LINES = 256; + +/** + * Creates a collector that accumulates data Buffers from a stream up to + * specified byte and line limits. After either limit is exceeded, further + * data is ignored. + */ +export function createTruncatingCollector( + stream: NodeJS.ReadableStream, + byteLimit: number = MAX_OUTPUT_BYTES, + lineLimit: number = MAX_OUTPUT_LINES, +): { + getString: () => string; + hit: boolean; +} { + const chunks: Array = []; + let totalBytes = 0; + let totalLines = 0; + let hitLimit = false; + + stream?.on("data", (data: Buffer) => { + if (hitLimit) { + return; + } + const dataLength = data.length; + let newlineCount = 0; + for (let i = 0; i < dataLength; i++) { + if (data[i] === 0x0a) { + newlineCount++; + } + } + // If entire chunk fits within byte and line limits, take it whole + if ( + totalBytes + dataLength <= byteLimit && + totalLines + newlineCount <= lineLimit + ) { + chunks.push(data); + totalBytes += dataLength; + totalLines += newlineCount; + } else { + // Otherwise, take a partial slice up to the first limit breach + const allowedBytes = byteLimit - totalBytes; + const allowedLines = lineLimit - totalLines; + let bytesTaken = 0; + let linesSeen = 0; + for (let i = 0; i < dataLength; i++) { + // Stop if byte or line limit is reached + if (bytesTaken === allowedBytes || linesSeen === allowedLines) { + break; + } + const byte = data[i]; + if (byte === 0x0a) { + linesSeen++; + } + bytesTaken++; + } + if (bytesTaken > 0) { + chunks.push(data.slice(0, bytesTaken)); + totalBytes += bytesTaken; + totalLines += linesSeen; + } + hitLimit = true; + } + }); + + return { + getString() { + return Buffer.concat(chunks).toString("utf8"); + }, + /** True if either byte or line limit was exceeded */ + get hit(): boolean { + return hitLimit; + }, + }; +} diff --git a/codex-cli/src/utils/agent/sandbox/raw-exec.ts b/codex-cli/src/utils/agent/sandbox/raw-exec.ts index b3d1d8ec..b33feb85 100644 --- a/codex-cli/src/utils/agent/sandbox/raw-exec.ts +++ b/codex-cli/src/utils/agent/sandbox/raw-exec.ts @@ -9,14 +9,10 @@ import type { import { log } from "../../logger/log.js"; import { adaptCommandForPlatform } from "../platform-commands.js"; +import { createTruncatingCollector } from "./create-truncating-collector"; import { spawn } from "child_process"; import * as os from "os"; -// Maximum output cap: either MAX_OUTPUT_LINES lines or MAX_OUTPUT_BYTES bytes, -// whichever limit is reached first. -const MAX_OUTPUT_BYTES = 1024 * 10; // 10 KB -const MAX_OUTPUT_LINES = 256; - /** * This function should never return a rejected promise: errors should be * mapped to a non-zero exit code and the error message should be in stderr. @@ -204,49 +200,6 @@ export function exec( }); } -/** - * Creates a collector that accumulates data Buffers from a stream up to - * specified byte and line limits. After either limit is exceeded, further - * data is ignored. - */ -function createTruncatingCollector( - stream: NodeJS.ReadableStream, - byteLimit: number = MAX_OUTPUT_BYTES, - lineLimit: number = MAX_OUTPUT_LINES, -) { - const chunks: Array = []; - let totalBytes = 0; - let totalLines = 0; - let hitLimit = false; - - stream?.on("data", (data: Buffer) => { - if (hitLimit) { - return; - } - totalBytes += data.length; - for (let i = 0; i < data.length; i++) { - if (data[i] === 0x0a) { - totalLines++; - } - } - if (totalBytes <= byteLimit && totalLines <= lineLimit) { - chunks.push(data); - } else { - hitLimit = true; - } - }); - - return { - getString() { - return Buffer.concat(chunks).toString("utf8"); - }, - /** True if either byte or line limit was exceeded */ - get hit(): boolean { - return hitLimit; - }, - }; -} - /** * Adds a truncation warnings to stdout and stderr, if appropriate. */ diff --git a/codex-cli/tests/create-truncating-collector.test.ts b/codex-cli/tests/create-truncating-collector.test.ts new file mode 100644 index 00000000..ad3dee55 --- /dev/null +++ b/codex-cli/tests/create-truncating-collector.test.ts @@ -0,0 +1,55 @@ +import { PassThrough } from "stream"; +import { once } from "events"; +import { describe, it, expect } from "vitest"; +import { createTruncatingCollector } from "../src/utils/agent/sandbox/create-truncating-collector.js"; + +describe("createTruncatingCollector", () => { + it("collects data under limits without truncation", async () => { + const stream = new PassThrough(); + const collector = createTruncatingCollector(stream, 100, 10); + const data = "line1\nline2\n"; + stream.end(Buffer.from(data)); + await once(stream, "end"); + expect(collector.getString()).toBe(data); + expect(collector.hit).toBe(false); + }); + + it("truncates data over byte limit", async () => { + const stream = new PassThrough(); + const collector = createTruncatingCollector(stream, 5, 100); + stream.end(Buffer.from("hello world")); + await once(stream, "end"); + expect(collector.getString()).toBe("hello"); + expect(collector.hit).toBe(true); + }); + + it("truncates data over line limit", async () => { + const stream = new PassThrough(); + const collector = createTruncatingCollector(stream, 1000, 2); + const data = "a\nb\nc\nd\n"; + stream.end(Buffer.from(data)); + await once(stream, "end"); + expect(collector.getString()).toBe("a\nb\n"); + expect(collector.hit).toBe(true); + }); + + it("stops collecting after limit is hit across multiple writes", async () => { + const stream = new PassThrough(); + const collector = createTruncatingCollector(stream, 10, 2); + stream.write(Buffer.from("1\n")); + stream.write(Buffer.from("2\n3\n4\n")); + stream.end(); + await once(stream, "end"); + expect(collector.getString()).toBe("1\n2\n"); + expect(collector.hit).toBe(true); + }); + + it("handles zero limits", async () => { + const stream = new PassThrough(); + const collector = createTruncatingCollector(stream, 0, 0); + stream.end(Buffer.from("anything\n")); + await once(stream, "end"); + expect(collector.getString()).toBe(""); + expect(collector.hit).toBe(true); + }); +});