include fractional portion of chunk that exceeds stdout/stderr limit (#497)
I saw cases where the first chunk of output from `ls -R` could be large
enough to exceed `MAX_OUTPUT_BYTES` or `MAX_OUTPUT_LINES`, in which case
the loop would exit early in `createTruncatingCollector()` such that
nothing was appended to the `chunks` array. As a result, the reported
`stdout` of `ls -R` would be empty.
I asked Codex to add logic to handle this edge case and write a unit
test. I used this as my test:
```
./codex-cli/dist/cli.js -q 'what is the output of `ls -R`'
```
now it appears to include a ton of stuff whereas before this change, I
saw:
```
{"type":"function_call_output","call_id":"call_a2QhVt7HRJYKjb3dIc8w1aBB","output":"{\"output\":\"\\n\\n[Output truncated: too many lines or bytes]\",\"metadata\":{\"exit_code\":0,\"duration_seconds\":0.5}}"}
```
This commit is contained in:
@@ -0,0 +1,78 @@
|
|||||||
|
// Maximum output cap: either MAX_OUTPUT_LINES lines or MAX_OUTPUT_BYTES bytes,
|
||||||
|
// whichever limit is reached first.
|
||||||
|
const MAX_OUTPUT_BYTES = 1024 * 10; // 10 KB
|
||||||
|
const MAX_OUTPUT_LINES = 256;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a collector that accumulates data Buffers from a stream up to
|
||||||
|
* specified byte and line limits. After either limit is exceeded, further
|
||||||
|
* data is ignored.
|
||||||
|
*/
|
||||||
|
export function createTruncatingCollector(
|
||||||
|
stream: NodeJS.ReadableStream,
|
||||||
|
byteLimit: number = MAX_OUTPUT_BYTES,
|
||||||
|
lineLimit: number = MAX_OUTPUT_LINES,
|
||||||
|
): {
|
||||||
|
getString: () => string;
|
||||||
|
hit: boolean;
|
||||||
|
} {
|
||||||
|
const chunks: Array<Buffer> = [];
|
||||||
|
let totalBytes = 0;
|
||||||
|
let totalLines = 0;
|
||||||
|
let hitLimit = false;
|
||||||
|
|
||||||
|
stream?.on("data", (data: Buffer) => {
|
||||||
|
if (hitLimit) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const dataLength = data.length;
|
||||||
|
let newlineCount = 0;
|
||||||
|
for (let i = 0; i < dataLength; i++) {
|
||||||
|
if (data[i] === 0x0a) {
|
||||||
|
newlineCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If entire chunk fits within byte and line limits, take it whole
|
||||||
|
if (
|
||||||
|
totalBytes + dataLength <= byteLimit &&
|
||||||
|
totalLines + newlineCount <= lineLimit
|
||||||
|
) {
|
||||||
|
chunks.push(data);
|
||||||
|
totalBytes += dataLength;
|
||||||
|
totalLines += newlineCount;
|
||||||
|
} else {
|
||||||
|
// Otherwise, take a partial slice up to the first limit breach
|
||||||
|
const allowedBytes = byteLimit - totalBytes;
|
||||||
|
const allowedLines = lineLimit - totalLines;
|
||||||
|
let bytesTaken = 0;
|
||||||
|
let linesSeen = 0;
|
||||||
|
for (let i = 0; i < dataLength; i++) {
|
||||||
|
// Stop if byte or line limit is reached
|
||||||
|
if (bytesTaken === allowedBytes || linesSeen === allowedLines) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const byte = data[i];
|
||||||
|
if (byte === 0x0a) {
|
||||||
|
linesSeen++;
|
||||||
|
}
|
||||||
|
bytesTaken++;
|
||||||
|
}
|
||||||
|
if (bytesTaken > 0) {
|
||||||
|
chunks.push(data.slice(0, bytesTaken));
|
||||||
|
totalBytes += bytesTaken;
|
||||||
|
totalLines += linesSeen;
|
||||||
|
}
|
||||||
|
hitLimit = true;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
getString() {
|
||||||
|
return Buffer.concat(chunks).toString("utf8");
|
||||||
|
},
|
||||||
|
/** True if either byte or line limit was exceeded */
|
||||||
|
get hit(): boolean {
|
||||||
|
return hitLimit;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -9,14 +9,10 @@ import type {
|
|||||||
|
|
||||||
import { log } from "../../logger/log.js";
|
import { log } from "../../logger/log.js";
|
||||||
import { adaptCommandForPlatform } from "../platform-commands.js";
|
import { adaptCommandForPlatform } from "../platform-commands.js";
|
||||||
|
import { createTruncatingCollector } from "./create-truncating-collector";
|
||||||
import { spawn } from "child_process";
|
import { spawn } from "child_process";
|
||||||
import * as os from "os";
|
import * as os from "os";
|
||||||
|
|
||||||
// Maximum output cap: either MAX_OUTPUT_LINES lines or MAX_OUTPUT_BYTES bytes,
|
|
||||||
// whichever limit is reached first.
|
|
||||||
const MAX_OUTPUT_BYTES = 1024 * 10; // 10 KB
|
|
||||||
const MAX_OUTPUT_LINES = 256;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function should never return a rejected promise: errors should be
|
* This function should never return a rejected promise: errors should be
|
||||||
* mapped to a non-zero exit code and the error message should be in stderr.
|
* mapped to a non-zero exit code and the error message should be in stderr.
|
||||||
@@ -204,49 +200,6 @@ export function exec(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a collector that accumulates data Buffers from a stream up to
|
|
||||||
* specified byte and line limits. After either limit is exceeded, further
|
|
||||||
* data is ignored.
|
|
||||||
*/
|
|
||||||
function createTruncatingCollector(
|
|
||||||
stream: NodeJS.ReadableStream,
|
|
||||||
byteLimit: number = MAX_OUTPUT_BYTES,
|
|
||||||
lineLimit: number = MAX_OUTPUT_LINES,
|
|
||||||
) {
|
|
||||||
const chunks: Array<Buffer> = [];
|
|
||||||
let totalBytes = 0;
|
|
||||||
let totalLines = 0;
|
|
||||||
let hitLimit = false;
|
|
||||||
|
|
||||||
stream?.on("data", (data: Buffer) => {
|
|
||||||
if (hitLimit) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
totalBytes += data.length;
|
|
||||||
for (let i = 0; i < data.length; i++) {
|
|
||||||
if (data[i] === 0x0a) {
|
|
||||||
totalLines++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (totalBytes <= byteLimit && totalLines <= lineLimit) {
|
|
||||||
chunks.push(data);
|
|
||||||
} else {
|
|
||||||
hitLimit = true;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
getString() {
|
|
||||||
return Buffer.concat(chunks).toString("utf8");
|
|
||||||
},
|
|
||||||
/** True if either byte or line limit was exceeded */
|
|
||||||
get hit(): boolean {
|
|
||||||
return hitLimit;
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a truncation warnings to stdout and stderr, if appropriate.
|
* Adds a truncation warnings to stdout and stderr, if appropriate.
|
||||||
*/
|
*/
|
||||||
|
|||||||
55
codex-cli/tests/create-truncating-collector.test.ts
Normal file
55
codex-cli/tests/create-truncating-collector.test.ts
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
import { PassThrough } from "stream";
|
||||||
|
import { once } from "events";
|
||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { createTruncatingCollector } from "../src/utils/agent/sandbox/create-truncating-collector.js";
|
||||||
|
|
||||||
|
describe("createTruncatingCollector", () => {
|
||||||
|
it("collects data under limits without truncation", async () => {
|
||||||
|
const stream = new PassThrough();
|
||||||
|
const collector = createTruncatingCollector(stream, 100, 10);
|
||||||
|
const data = "line1\nline2\n";
|
||||||
|
stream.end(Buffer.from(data));
|
||||||
|
await once(stream, "end");
|
||||||
|
expect(collector.getString()).toBe(data);
|
||||||
|
expect(collector.hit).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("truncates data over byte limit", async () => {
|
||||||
|
const stream = new PassThrough();
|
||||||
|
const collector = createTruncatingCollector(stream, 5, 100);
|
||||||
|
stream.end(Buffer.from("hello world"));
|
||||||
|
await once(stream, "end");
|
||||||
|
expect(collector.getString()).toBe("hello");
|
||||||
|
expect(collector.hit).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("truncates data over line limit", async () => {
|
||||||
|
const stream = new PassThrough();
|
||||||
|
const collector = createTruncatingCollector(stream, 1000, 2);
|
||||||
|
const data = "a\nb\nc\nd\n";
|
||||||
|
stream.end(Buffer.from(data));
|
||||||
|
await once(stream, "end");
|
||||||
|
expect(collector.getString()).toBe("a\nb\n");
|
||||||
|
expect(collector.hit).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("stops collecting after limit is hit across multiple writes", async () => {
|
||||||
|
const stream = new PassThrough();
|
||||||
|
const collector = createTruncatingCollector(stream, 10, 2);
|
||||||
|
stream.write(Buffer.from("1\n"));
|
||||||
|
stream.write(Buffer.from("2\n3\n4\n"));
|
||||||
|
stream.end();
|
||||||
|
await once(stream, "end");
|
||||||
|
expect(collector.getString()).toBe("1\n2\n");
|
||||||
|
expect(collector.hit).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles zero limits", async () => {
|
||||||
|
const stream = new PassThrough();
|
||||||
|
const collector = createTruncatingCollector(stream, 0, 0);
|
||||||
|
stream.end(Buffer.from("anything\n"));
|
||||||
|
await once(stream, "end");
|
||||||
|
expect(collector.getString()).toBe("");
|
||||||
|
expect(collector.hit).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user