Files
llmx/codex-cli/tests/agent-project-doc.test.ts
Michael Bolin ae5b1b5cb5 add support for -w,--writable-root to add more writable roots for sandbox (#263)
This adds support for a new flag, `-w,--writable-root`, that can be
specified multiple times to _amend_ the list of folders that should be
configured as "writable roots" by the sandbox used in `full-auto` mode.
Values that are passed as relative paths will be resolved to absolute
paths.

Incidentally, this required updating a number of the `agent*.test.ts`
files: it feels like some of the setup logic across those tests could be
consolidated.

In my testing, it seems that this might be slightly out of distribution
for the model, as I had to explicitly tell it to run `apply_patch` and
that it had the permissions to write those files (initially, it just
showed me a diff and told me to apply it myself). Nevertheless, I think
this is a good starting point.
2025-04-17 15:39:26 -07:00

143 lines
4.4 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "fs";
import { tmpdir } from "os";
import { join } from "path";
import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
// ---------------------------------------------------------------------------
// Test helpers & mocks
// ---------------------------------------------------------------------------
// Fake stream returned from the mocked OpenAI SDK. The AgentLoop only cares
// that the stream is asynciterable and eventually yields a `response.completed`
// event so the turn can finish.
class FakeStream {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
yield {
type: "response.completed",
response: {
id: "r1",
status: "completed",
output: [],
},
} as any;
}
}
// Capture the parameters that AgentLoop sends to `openai.responses.create()` so
// we can assert on the `instructions` value.
let lastCreateParams: any = null;
vi.mock("openai", () => {
class FakeOpenAI {
public responses = {
create: async (params: any) => {
lastCreateParams = params;
return new FakeStream();
},
};
}
class APIConnectionTimeoutError extends Error {}
return {
__esModule: true,
default: FakeOpenAI,
APIConnectionTimeoutError,
};
});
// The AgentLoop pulls these helpers in order to decide whether a command can
// be autoapproved. None of that matters for this test, so we stub the module
// with minimal noop implementations.
vi.mock("../src/approvals.js", () => {
return {
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () =>
({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
};
});
vi.mock("../src/format-command.js", () => {
return {
__esModule: true,
formatCommandForDisplay: (cmd: Array<string>) => cmd.join(" "),
};
});
// Stub the filebased logger to avoid side effects and keep the test output
// clean.
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
// ---------------------------------------------------------------------------
// After mocks are in place we can import the modules under test.
// ---------------------------------------------------------------------------
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
import { loadConfig } from "../src/utils/config.js";
// ---------------------------------------------------------------------------
let projectDir: string;
beforeEach(() => {
// Create a fresh temporary directory to act as an isolated git repo.
projectDir = mkdtempSync(join(tmpdir(), "codex-proj-"));
mkdirSync(join(projectDir, ".git")); // mark as project root
// Write a small project doc that we expect to be included in the prompt.
writeFileSync(join(projectDir, "codex.md"), "# Test Project\nHello docs!\n");
lastCreateParams = null; // reset captured SDK params
});
afterEach(() => {
rmSync(projectDir, { recursive: true, force: true });
});
describe("AgentLoop", () => {
it("passes codex.md contents through the instructions parameter", async () => {
const config = loadConfig(undefined, undefined, { cwd: projectDir });
// Sanitycheck that loadConfig picked up the project doc. This is *not* the
// main assertion we just avoid a falsepositive if the fixture setup is
// incorrect.
expect(config.instructions).toContain("Hello docs!");
const agent = new AgentLoop({
additionalWritableRoots: [],
model: "o3", // arbitrary
instructions: config.instructions,
config,
approvalPolicy: { mode: "suggest" } as any,
onItem: () => {},
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
// Kick off a single run and wait for it to finish. The fake OpenAI client
// will resolve immediately.
await agent.run([
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "ping" }],
},
]);
// Ensure the AgentLoop called the SDK and that the instructions we see at
// that point still include the project doc. This validates the full path:
// loadConfig → AgentLoop → addInstructionPrefix → OpenAI SDK.
expect(lastCreateParams).not.toBeNull();
expect(lastCreateParams.instructions).toContain("Hello docs!");
});
});