Files
llmx/codex-cli/tests/agent-rate-limit-error.test.ts
Michael Bolin ae5b1b5cb5 add support for -w,--writable-root to add more writable roots for sandbox (#263)
This adds support for a new flag, `-w,--writable-root`, that can be
specified multiple times to _amend_ the list of folders that should be
configured as "writable roots" by the sandbox used in `full-auto` mode.
Values that are passed as relative paths will be resolved to absolute
paths.

Incidentally, this required updating a number of the `agent*.test.ts`
files: it feels like some of the setup logic across those tests could be
consolidated.

In my testing, it seems that this might be slightly out of distribution
for the model, as I had to explicitly tell it to run `apply_patch` and
that it had the permissions to write those files (initially, it just
showed me a diff and told me to apply it myself). Nevertheless, I think
this is a good starting point.
2025-04-17 15:39:26 -07:00

131 lines
4.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { describe, it, expect, vi } from "vitest";
// ---------------------------------------------------------------------------
// Mock helpers
// ---------------------------------------------------------------------------
// Keep reference so test cases can programmatically change behaviour of the
// fake OpenAI client.
const openAiState: { createSpy?: ReturnType<typeof vi.fn> } = {};
/**
* Mock the "openai" package so we can simulate ratelimit errors without
* making real network calls. The AgentLoop only relies on `responses.create`
* so we expose a minimal stub.
*/
vi.mock("openai", () => {
class FakeOpenAI {
public responses = {
// Will be replaced pertest via `openAiState.createSpy`.
create: (...args: Array<any>) => openAiState.createSpy!(...args),
};
}
// The real SDK exports this constructor include it for typings even
// though it is not used in this spec.
class APIConnectionTimeoutError extends Error {}
return {
__esModule: true,
default: FakeOpenAI,
APIConnectionTimeoutError,
};
});
// Stub helpers that the agent indirectly imports so it does not attempt any
// filesystem access or real approvals logic during the test.
vi.mock("../src/approvals.js", () => ({
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
}));
vi.mock("../src/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
// Silence agentloop debug logging so test output stays clean.
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
describe("AgentLoop ratelimit handling", () => {
it("retries up to the maximum and then surfaces a system message", async () => {
// Enable fake timers for this test only we restore real timers at the end
// so other tests are unaffected.
vi.useFakeTimers();
try {
// Construct a dummy ratelimit error that matches the implementation's
// detection logic (`status === 429`).
const rateLimitErr: any = new Error("Rate limit exceeded");
rateLimitErr.status = 429;
// Always throw the ratelimit error to force the loop to exhaust all
// retries (5 attempts in total).
openAiState.createSpy = vi.fn(async () => {
throw rateLimitErr;
});
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
additionalWritableRoots: [],
onItem: (i) => received.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "hello" }],
},
];
// Start the run but don't await yet so we can advance fake timers while it
// is in progress.
const runPromise = agent.run(userMsg as any);
// The agent waits 15 000 ms between retries (ratelimit backoff) and does
// this four times (after attempts 14). Fastforward a bit more to cover
// any additional small `setTimeout` calls inside the implementation.
await vi.advanceTimersByTimeAsync(61_000); // 4 * 15s + 1s safety margin
// Ensure the promise settles without throwing.
await expect(runPromise).resolves.not.toThrow();
// Flush the 10 ms staging delay used when emitting items.
await vi.advanceTimersByTimeAsync(20);
// The OpenAI client should have been called the maximum number of retry
// attempts (5).
expect(openAiState.createSpy).toHaveBeenCalledTimes(5);
// Finally, verify that the user sees a helpful system message.
const sysMsg = received.find(
(i) =>
i.role === "system" &&
typeof i.content?.[0]?.text === "string" &&
i.content[0].text.includes("Rate limit reached"),
);
expect(sysMsg).toBeTruthy();
} finally {
// Ensure global timer state is restored for subsequent tests.
vi.useRealTimers();
}
});
});