include fractional portion of chunk that exceeds stdout/stderr limit (#497)

I saw cases where the first chunk of output from `ls -R` could be large enough to exceed `MAX_OUTPUT_BYTES` or `MAX_OUTPUT_LINES`, in which case the loop would exit early in `createTruncatingCollector()` such that nothing was appended to the `chunks` array. As a result, the reported `stdout` of `ls -R` would be empty. I asked Codex to add logic to handle this edge case and write a unit test. I used this as my test: ``` ./codex-cli/dist/cli.js -q 'what is the output of `ls -R`' ``` now it appears to include a ton of stuff whereas before this change, I saw: ``` {"type":"function_call_output","call_id":"call_a2QhVt7HRJYKjb3dIc8w1aBB","output":"{\"output\":\"\\n\\n[Output truncated: too many lines or bytes]\",\"metadata\":{\"exit_code\":0,\"duration_seconds\":0.5}}"} ```
2025-04-21 19:06:03 -07:00
parent 99ed27ad1b
commit 3eba86a553
3 changed files with 134 additions and 48 deletions
--- a/codex-cli/src/utils/agent/sandbox/create-truncating-collector.ts
+++ b/codex-cli/src/utils/agent/sandbox/create-truncating-collector.ts
@@ -0,0 +1,78 @@
 // Maximum output cap: either MAX_OUTPUT_LINES lines or MAX_OUTPUT_BYTES bytes,
 // whichever limit is reached first.
 const MAX_OUTPUT_BYTES = 1024 * 10; // 10 KB
 const MAX_OUTPUT_LINES = 256;
 /**
 * Creates a collector that accumulates data Buffers from a stream up to
 * specified byte and line limits. After either limit is exceeded, further
 * data is ignored.
 */
 export function createTruncatingCollector(
  stream: NodeJS.ReadableStream,
  byteLimit: number = MAX_OUTPUT_BYTES,
  lineLimit: number = MAX_OUTPUT_LINES,
 ): {
  getString: () => string;
  hit: boolean;
 } {
  const chunks: Array<Buffer> = [];
  let totalBytes = 0;
  let totalLines = 0;
  let hitLimit = false;
  stream?.on("data", (data: Buffer) => {
    if (hitLimit) {
      return;
    }
    const dataLength = data.length;
    let newlineCount = 0;
    for (let i = 0; i < dataLength; i++) {
      if (data[i] === 0x0a) {
        newlineCount++;
      }
    }
    // If entire chunk fits within byte and line limits, take it whole
    if (
      totalBytes + dataLength <= byteLimit &&
      totalLines + newlineCount <= lineLimit
    ) {
      chunks.push(data);
      totalBytes += dataLength;
      totalLines += newlineCount;
    } else {
      // Otherwise, take a partial slice up to the first limit breach
      const allowedBytes = byteLimit - totalBytes;
      const allowedLines = lineLimit - totalLines;
      let bytesTaken = 0;
      let linesSeen = 0;
      for (let i = 0; i < dataLength; i++) {
        // Stop if byte or line limit is reached
        if (bytesTaken === allowedBytes || linesSeen === allowedLines) {
          break;
        }
        const byte = data[i];
        if (byte === 0x0a) {
          linesSeen++;
        }
        bytesTaken++;
      }
      if (bytesTaken > 0) {
        chunks.push(data.slice(0, bytesTaken));
        totalBytes += bytesTaken;
        totalLines += linesSeen;
      }
      hitLimit = true;
    }
  });
  return {
    getString() {
      return Buffer.concat(chunks).toString("utf8");
    },
    /** True if either byte or line limit was exceeded */
    get hit(): boolean {
      return hitLimit;
    },
  };
 }
--- a/codex-cli/src/utils/agent/sandbox/raw-exec.ts
+++ b/codex-cli/src/utils/agent/sandbox/raw-exec.ts
@@ -9,14 +9,10 @@ import type {
 import { log } from "../../logger/log.js";
 import { adaptCommandForPlatform } from "../platform-commands.js";
 import { createTruncatingCollector } from "./create-truncating-collector";
 import { spawn } from "child_process";
 import * as os from "os";
 // Maximum output cap: either MAX_OUTPUT_LINES lines or MAX_OUTPUT_BYTES bytes,
 // whichever limit is reached first.
 const MAX_OUTPUT_BYTES = 1024 * 10; // 10 KB
 const MAX_OUTPUT_LINES = 256;
 /**
 * This function should never return a rejected promise: errors should be
 * mapped to a non-zero exit code and the error message should be in stderr.
@@ -204,49 +200,6 @@ export function exec(
  });
 }
 /**
 * Creates a collector that accumulates data Buffers from a stream up to
 * specified byte and line limits. After either limit is exceeded, further
 * data is ignored.
 */
 function createTruncatingCollector(
  stream: NodeJS.ReadableStream,
  byteLimit: number = MAX_OUTPUT_BYTES,
  lineLimit: number = MAX_OUTPUT_LINES,
 ) {
  const chunks: Array<Buffer> = [];
  let totalBytes = 0;
  let totalLines = 0;
  let hitLimit = false;
  stream?.on("data", (data: Buffer) => {
    if (hitLimit) {
      return;
    }
    totalBytes += data.length;
    for (let i = 0; i < data.length; i++) {
      if (data[i] === 0x0a) {
        totalLines++;
      }
    }
    if (totalBytes <= byteLimit && totalLines <= lineLimit) {
      chunks.push(data);
    } else {
      hitLimit = true;
    }
  });
  return {
    getString() {
      return Buffer.concat(chunks).toString("utf8");
    },
    /** True if either byte or line limit was exceeded */
    get hit(): boolean {
      return hitLimit;
    },
  };
 }
 /**
 * Adds a truncation warnings to stdout and stderr, if appropriate.
 */
--- a/codex-cli/tests/create-truncating-collector.test.ts
+++ b/codex-cli/tests/create-truncating-collector.test.ts
@@ -0,0 +1,55 @@
 import { PassThrough } from "stream";
 import { once } from "events";
 import { describe, it, expect } from "vitest";
 import { createTruncatingCollector } from "../src/utils/agent/sandbox/create-truncating-collector.js";
 describe("createTruncatingCollector", () => {
  it("collects data under limits without truncation", async () => {
    const stream = new PassThrough();
    const collector = createTruncatingCollector(stream, 100, 10);
    const data = "line1\nline2\n";
    stream.end(Buffer.from(data));
    await once(stream, "end");
    expect(collector.getString()).toBe(data);
    expect(collector.hit).toBe(false);
  });
  it("truncates data over byte limit", async () => {
    const stream = new PassThrough();
    const collector = createTruncatingCollector(stream, 5, 100);
    stream.end(Buffer.from("hello world"));
    await once(stream, "end");
    expect(collector.getString()).toBe("hello");
    expect(collector.hit).toBe(true);
  });
  it("truncates data over line limit", async () => {
    const stream = new PassThrough();
    const collector = createTruncatingCollector(stream, 1000, 2);
    const data = "a\nb\nc\nd\n";
    stream.end(Buffer.from(data));
    await once(stream, "end");
    expect(collector.getString()).toBe("a\nb\n");
    expect(collector.hit).toBe(true);
  });
  it("stops collecting after limit is hit across multiple writes", async () => {
    const stream = new PassThrough();
    const collector = createTruncatingCollector(stream, 10, 2);
    stream.write(Buffer.from("1\n"));
    stream.write(Buffer.from("2\n3\n4\n"));
    stream.end();
    await once(stream, "end");
    expect(collector.getString()).toBe("1\n2\n");
    expect(collector.hit).toBe(true);
  });
  it("handles zero limits", async () => {
    const stream = new PassThrough();
    const collector = createTruncatingCollector(stream, 0, 0);
    stream.end(Buffer.from("anything\n"));
    await once(stream, "end");
    expect(collector.getString()).toBe("");
    expect(collector.hit).toBe(true);
  });
 });