From 3eba86a55353f6b6f0efee19704b8d11e683b4ad Mon Sep 17 00:00:00 2001
From: Michael Bolin <mbolin@openai.com>
Date: Mon, 21 Apr 2025 19:06:03 -0700
Subject: [PATCH] include fractional portion of chunk that exceeds
 stdout/stderr limit (#497)

I saw cases where the first chunk of output from `ls -R` could be large
enough to exceed `MAX_OUTPUT_BYTES` or `MAX_OUTPUT_LINES`, in which case
the loop would exit early in `createTruncatingCollector()` such that
nothing was appended to the `chunks` array. As a result, the reported
`stdout` of `ls -R` would be empty.

I asked Codex to add logic to handle this edge case and write a unit
test. I used this as my test:

```
./codex-cli/dist/cli.js -q 'what is the output of `ls -R`'
```

now it appears to include a ton of stuff whereas before this change, I
saw:

```
{"type":"function_call_output","call_id":"call_a2QhVt7HRJYKjb3dIc8w1aBB","output":"{\"output\":\"\\n\\n[Output truncated: too many lines or bytes]\",\"metadata\":{\"exit_code\":0,\"duration_seconds\":0.5}}"}
```
---
 .../sandbox/create-truncating-collector.ts    | 78 +++++++++++++++++++
 codex-cli/src/utils/agent/sandbox/raw-exec.ts | 49 +-----------
 .../tests/create-truncating-collector.test.ts | 55 +++++++++++++
 3 files changed, 134 insertions(+), 48 deletions(-)
 create mode 100644 codex-cli/src/utils/agent/sandbox/create-truncating-collector.ts
 create mode 100644 codex-cli/tests/create-truncating-collector.test.ts
diff --git a/codex-cli/src/utils/agent/sandbox/create-truncating-collector.ts b/codex-cli/src/utils/agent/sandbox/create-truncating-collector.ts
new file mode 100644
index 00000000..518d475c
--- /dev/null
+++ b/codex-cli/src/utils/agent/sandbox/create-truncating-collector.ts
@@ -0,0 +1,78 @@
+// Maximum output cap: either MAX_OUTPUT_LINES lines or MAX_OUTPUT_BYTES bytes,
+// whichever limit is reached first.
+const MAX_OUTPUT_BYTES = 1024 * 10; // 10 KB
+const MAX_OUTPUT_LINES = 256;
+
+/**
+ * Creates a collector that accumulates data Buffers from a stream up to
+ * specified byte and line limits. After either limit is exceeded, further
+ * data is ignored.
+ */
+export function createTruncatingCollector(
+  stream: NodeJS.ReadableStream,
+  byteLimit: number = MAX_OUTPUT_BYTES,
+  lineLimit: number = MAX_OUTPUT_LINES,
+): {
+  getString: () => string;
+  hit: boolean;
+} {
+  const chunks: Array<Buffer> = [];
+  let totalBytes = 0;
+  let totalLines = 0;
+  let hitLimit = false;
+
+  stream?.on("data", (data: Buffer) => {
+    if (hitLimit) {
+      return;
+    }
+    const dataLength = data.length;
+    let newlineCount = 0;
+    for (let i = 0; i < dataLength; i++) {
+      if (data[i] === 0x0a) {
+        newlineCount++;
+      }
+    }
+    // If entire chunk fits within byte and line limits, take it whole
+    if (
+      totalBytes + dataLength <= byteLimit &&
+      totalLines + newlineCount <= lineLimit
+    ) {
+      chunks.push(data);
+      totalBytes += dataLength;
+      totalLines += newlineCount;
+    } else {
+      // Otherwise, take a partial slice up to the first limit breach
+      const allowedBytes = byteLimit - totalBytes;
+      const allowedLines = lineLimit - totalLines;
+      let bytesTaken = 0;
+      let linesSeen = 0;
+      for (let i = 0; i < dataLength; i++) {
+        // Stop if byte or line limit is reached
+        if (bytesTaken === allowedBytes || linesSeen === allowedLines) {
+          break;
+        }
+        const byte = data[i];
+        if (byte === 0x0a) {
+          linesSeen++;
+        }
+        bytesTaken++;
+      }
+      if (bytesTaken > 0) {
+        chunks.push(data.slice(0, bytesTaken));
+        totalBytes += bytesTaken;
+        totalLines += linesSeen;
+      }
+      hitLimit = true;
+    }
+  });
+
+  return {
+    getString() {
+      return Buffer.concat(chunks).toString("utf8");
+    },
+    /** True if either byte or line limit was exceeded */
+    get hit(): boolean {
+      return hitLimit;
+    },
+  };
+}
diff --git a/codex-cli/src/utils/agent/sandbox/raw-exec.ts b/codex-cli/src/utils/agent/sandbox/raw-exec.ts
index b3d1d8ec..b33feb85 100644
--- a/codex-cli/src/utils/agent/sandbox/raw-exec.ts
+++ b/codex-cli/src/utils/agent/sandbox/raw-exec.ts
@@ -9,14 +9,10 @@ import type {
 
 import { log } from "../../logger/log.js";
 import { adaptCommandForPlatform } from "../platform-commands.js";
+import { createTruncatingCollector } from "./create-truncating-collector";
 import { spawn } from "child_process";
 import * as os from "os";
 
-// Maximum output cap: either MAX_OUTPUT_LINES lines or MAX_OUTPUT_BYTES bytes,
-// whichever limit is reached first.
-const MAX_OUTPUT_BYTES = 1024 * 10; // 10 KB
-const MAX_OUTPUT_LINES = 256;
-
 /**
  * This function should never return a rejected promise: errors should be
  * mapped to a non-zero exit code and the error message should be in stderr.
@@ -204,49 +200,6 @@ export function exec(
   });
 }
 
-/**
- * Creates a collector that accumulates data Buffers from a stream up to
- * specified byte and line limits. After either limit is exceeded, further
- * data is ignored.
- */
-function createTruncatingCollector(
-  stream: NodeJS.ReadableStream,
-  byteLimit: number = MAX_OUTPUT_BYTES,
-  lineLimit: number = MAX_OUTPUT_LINES,
-) {
-  const chunks: Array<Buffer> = [];
-  let totalBytes = 0;
-  let totalLines = 0;
-  let hitLimit = false;
-
-  stream?.on("data", (data: Buffer) => {
-    if (hitLimit) {
-      return;
-    }
-    totalBytes += data.length;
-    for (let i = 0; i < data.length; i++) {
-      if (data[i] === 0x0a) {
-        totalLines++;
-      }
-    }
-    if (totalBytes <= byteLimit && totalLines <= lineLimit) {
-      chunks.push(data);
-    } else {
-      hitLimit = true;
-    }
-  });
-
-  return {
-    getString() {
-      return Buffer.concat(chunks).toString("utf8");
-    },
-    /** True if either byte or line limit was exceeded */
-    get hit(): boolean {
-      return hitLimit;
-    },
-  };
-}
-
 /**
  * Adds a truncation warnings to stdout and stderr, if appropriate.
  */
diff --git a/codex-cli/tests/create-truncating-collector.test.ts b/codex-cli/tests/create-truncating-collector.test.ts
new file mode 100644
index 00000000..ad3dee55
--- /dev/null
+++ b/codex-cli/tests/create-truncating-collector.test.ts
@@ -0,0 +1,55 @@
+import { PassThrough } from "stream";
+import { once } from "events";
+import { describe, it, expect } from "vitest";
+import { createTruncatingCollector } from "../src/utils/agent/sandbox/create-truncating-collector.js";
+
+describe("createTruncatingCollector", () => {
+  it("collects data under limits without truncation", async () => {
+    const stream = new PassThrough();
+    const collector = createTruncatingCollector(stream, 100, 10);
+    const data = "line1\nline2\n";
+    stream.end(Buffer.from(data));
+    await once(stream, "end");
+    expect(collector.getString()).toBe(data);
+    expect(collector.hit).toBe(false);
+  });
+
+  it("truncates data over byte limit", async () => {
+    const stream = new PassThrough();
+    const collector = createTruncatingCollector(stream, 5, 100);
+    stream.end(Buffer.from("hello world"));
+    await once(stream, "end");
+    expect(collector.getString()).toBe("hello");
+    expect(collector.hit).toBe(true);
+  });
+
+  it("truncates data over line limit", async () => {
+    const stream = new PassThrough();
+    const collector = createTruncatingCollector(stream, 1000, 2);
+    const data = "a\nb\nc\nd\n";
+    stream.end(Buffer.from(data));
+    await once(stream, "end");
+    expect(collector.getString()).toBe("a\nb\n");
+    expect(collector.hit).toBe(true);
+  });
+
+  it("stops collecting after limit is hit across multiple writes", async () => {
+    const stream = new PassThrough();
+    const collector = createTruncatingCollector(stream, 10, 2);
+    stream.write(Buffer.from("1\n"));
+    stream.write(Buffer.from("2\n3\n4\n"));
+    stream.end();
+    await once(stream, "end");
+    expect(collector.getString()).toBe("1\n2\n");
+    expect(collector.hit).toBe(true);
+  });
+
+  it("handles zero limits", async () => {
+    const stream = new PassThrough();
+    const collector = createTruncatingCollector(stream, 0, 0);
+    stream.end(Buffer.from("anything\n"));
+    await once(stream, "end");
+    expect(collector.getString()).toBe("");
+    expect(collector.hit).toBe(true);
+  });
+});