include fractional portion of chunk that exceeds stdout/stderr limit (#497)
I saw cases where the first chunk of output from `ls -R` could be large
enough to exceed `MAX_OUTPUT_BYTES` or `MAX_OUTPUT_LINES`, in which case
the loop would exit early in `createTruncatingCollector()` such that
nothing was appended to the `chunks` array. As a result, the reported
`stdout` of `ls -R` would be empty.
I asked Codex to add logic to handle this edge case and write a unit
test. I used this as my test:
```
./codex-cli/dist/cli.js -q 'what is the output of `ls -R`'
```
now it appears to include a ton of stuff whereas before this change, I
saw:
```
{"type":"function_call_output","call_id":"call_a2QhVt7HRJYKjb3dIc8w1aBB","output":"{\"output\":\"\\n\\n[Output truncated: too many lines or bytes]\",\"metadata\":{\"exit_code\":0,\"duration_seconds\":0.5}}"}
```
This commit is contained in:
@@ -0,0 +1,78 @@
|
||||
// Maximum output cap: either MAX_OUTPUT_LINES lines or MAX_OUTPUT_BYTES bytes,
|
||||
// whichever limit is reached first.
|
||||
const MAX_OUTPUT_BYTES = 1024 * 10; // 10 KB
|
||||
const MAX_OUTPUT_LINES = 256;
|
||||
|
||||
/**
|
||||
* Creates a collector that accumulates data Buffers from a stream up to
|
||||
* specified byte and line limits. After either limit is exceeded, further
|
||||
* data is ignored.
|
||||
*/
|
||||
export function createTruncatingCollector(
|
||||
stream: NodeJS.ReadableStream,
|
||||
byteLimit: number = MAX_OUTPUT_BYTES,
|
||||
lineLimit: number = MAX_OUTPUT_LINES,
|
||||
): {
|
||||
getString: () => string;
|
||||
hit: boolean;
|
||||
} {
|
||||
const chunks: Array<Buffer> = [];
|
||||
let totalBytes = 0;
|
||||
let totalLines = 0;
|
||||
let hitLimit = false;
|
||||
|
||||
stream?.on("data", (data: Buffer) => {
|
||||
if (hitLimit) {
|
||||
return;
|
||||
}
|
||||
const dataLength = data.length;
|
||||
let newlineCount = 0;
|
||||
for (let i = 0; i < dataLength; i++) {
|
||||
if (data[i] === 0x0a) {
|
||||
newlineCount++;
|
||||
}
|
||||
}
|
||||
// If entire chunk fits within byte and line limits, take it whole
|
||||
if (
|
||||
totalBytes + dataLength <= byteLimit &&
|
||||
totalLines + newlineCount <= lineLimit
|
||||
) {
|
||||
chunks.push(data);
|
||||
totalBytes += dataLength;
|
||||
totalLines += newlineCount;
|
||||
} else {
|
||||
// Otherwise, take a partial slice up to the first limit breach
|
||||
const allowedBytes = byteLimit - totalBytes;
|
||||
const allowedLines = lineLimit - totalLines;
|
||||
let bytesTaken = 0;
|
||||
let linesSeen = 0;
|
||||
for (let i = 0; i < dataLength; i++) {
|
||||
// Stop if byte or line limit is reached
|
||||
if (bytesTaken === allowedBytes || linesSeen === allowedLines) {
|
||||
break;
|
||||
}
|
||||
const byte = data[i];
|
||||
if (byte === 0x0a) {
|
||||
linesSeen++;
|
||||
}
|
||||
bytesTaken++;
|
||||
}
|
||||
if (bytesTaken > 0) {
|
||||
chunks.push(data.slice(0, bytesTaken));
|
||||
totalBytes += bytesTaken;
|
||||
totalLines += linesSeen;
|
||||
}
|
||||
hitLimit = true;
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
getString() {
|
||||
return Buffer.concat(chunks).toString("utf8");
|
||||
},
|
||||
/** True if either byte or line limit was exceeded */
|
||||
get hit(): boolean {
|
||||
return hitLimit;
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -9,14 +9,10 @@ import type {
|
||||
|
||||
import { log } from "../../logger/log.js";
|
||||
import { adaptCommandForPlatform } from "../platform-commands.js";
|
||||
import { createTruncatingCollector } from "./create-truncating-collector";
|
||||
import { spawn } from "child_process";
|
||||
import * as os from "os";
|
||||
|
||||
// Maximum output cap: either MAX_OUTPUT_LINES lines or MAX_OUTPUT_BYTES bytes,
|
||||
// whichever limit is reached first.
|
||||
const MAX_OUTPUT_BYTES = 1024 * 10; // 10 KB
|
||||
const MAX_OUTPUT_LINES = 256;
|
||||
|
||||
/**
|
||||
* This function should never return a rejected promise: errors should be
|
||||
* mapped to a non-zero exit code and the error message should be in stderr.
|
||||
@@ -204,49 +200,6 @@ export function exec(
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a collector that accumulates data Buffers from a stream up to
|
||||
* specified byte and line limits. After either limit is exceeded, further
|
||||
* data is ignored.
|
||||
*/
|
||||
function createTruncatingCollector(
|
||||
stream: NodeJS.ReadableStream,
|
||||
byteLimit: number = MAX_OUTPUT_BYTES,
|
||||
lineLimit: number = MAX_OUTPUT_LINES,
|
||||
) {
|
||||
const chunks: Array<Buffer> = [];
|
||||
let totalBytes = 0;
|
||||
let totalLines = 0;
|
||||
let hitLimit = false;
|
||||
|
||||
stream?.on("data", (data: Buffer) => {
|
||||
if (hitLimit) {
|
||||
return;
|
||||
}
|
||||
totalBytes += data.length;
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
if (data[i] === 0x0a) {
|
||||
totalLines++;
|
||||
}
|
||||
}
|
||||
if (totalBytes <= byteLimit && totalLines <= lineLimit) {
|
||||
chunks.push(data);
|
||||
} else {
|
||||
hitLimit = true;
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
getString() {
|
||||
return Buffer.concat(chunks).toString("utf8");
|
||||
},
|
||||
/** True if either byte or line limit was exceeded */
|
||||
get hit(): boolean {
|
||||
return hitLimit;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a truncation warnings to stdout and stderr, if appropriate.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user