Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>
This commit is contained in:
Ilan Bigio
2025-04-16 12:56:08 -04:00
commit 59a180ddec
163 changed files with 30587 additions and 0 deletions

View File

@@ -0,0 +1 @@
hello a

View File

@@ -0,0 +1 @@
hello b

View File

@@ -0,0 +1,127 @@
import { describe, it, expect, vi } from "vitest";
// Fake stream that waits a bit before yielding the function_call so the test
// can cancel first.
class SlowFunctionCallStream {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
await new Promise((r) => setTimeout(r, 30));
yield {
type: "response.output_item.done",
item: {
type: "function_call",
id: "slow_call",
name: "shell",
arguments: JSON.stringify({ cmd: ["echo", "hi"] }),
},
} as any;
yield {
type: "response.completed",
response: {
id: "resp_slow",
status: "completed",
output: [
{
type: "function_call",
id: "slow_call",
name: "shell",
arguments: JSON.stringify({ cmd: ["echo", "hi"] }),
},
],
},
} as any;
}
}
vi.mock("openai", () => {
const bodies: Array<any> = [];
let callCount = 0;
class FakeOpenAI {
public responses = {
create: async (body: any) => {
bodies.push(body);
callCount += 1;
if (callCount === 1) {
return new SlowFunctionCallStream();
}
return new (class {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {}
})();
},
};
}
class APIConnectionTimeoutError extends Error {}
return {
__esModule: true,
default: FakeOpenAI,
APIConnectionTimeoutError,
_test: { getBodies: () => bodies },
};
});
vi.mock("@lib/approvals.js", () => ({
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false } as any),
}));
vi.mock("@lib/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
describe("cancel before first function_call", () => {
it("clears previous_response_id if no call ids captured", async () => {
const { _test } = (await import("openai")) as any;
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: () => {},
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
config: { model: "any", instructions: "" },
});
// Start first run.
agent.run([
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "do" }],
},
] as any);
// Cancel quickly before any stream item.
await new Promise((r) => setTimeout(r, 5));
agent.cancel();
// Second run.
await agent.run([
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "new" }],
},
] as any);
const bodies = _test.getBodies();
const last = bodies[bodies.length - 1];
expect(last.previous_response_id).toBeUndefined();
});
});

View File

@@ -0,0 +1,150 @@
import { describe, it, expect, vi } from "vitest";
// Stream that emits a function_call so the agent records a `lastResponseId`.
class StreamWithFunctionCall {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
// First, deliver the function call.
yield {
type: "response.output_item.done",
item: {
type: "function_call",
id: "call123",
name: "shell",
arguments: JSON.stringify({ cmd: ["echo", "hi"] }),
},
} as any;
// Then conclude the turn.
yield {
type: "response.completed",
response: {
id: "resp_func_call", // lastResponseId that would normally be stored
status: "completed",
output: [
{
type: "function_call",
id: "call123",
name: "shell",
arguments: JSON.stringify({ cmd: ["echo", "hi"] }),
},
],
},
} as any;
}
}
vi.mock("openai", () => {
const invocationBodies: Array<any> = [];
let callNum = 0;
class FakeOpenAI {
public responses = {
create: async (body: any) => {
invocationBodies.push(body);
callNum += 1;
// First call streams a function_call, second call returns empty stream.
if (callNum === 1) {
return new StreamWithFunctionCall();
}
// Subsequent calls: empty stream.
return new (class {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
/* no events */
}
})();
},
};
}
class APIConnectionTimeoutError extends Error {}
return {
__esModule: true,
default: FakeOpenAI,
APIConnectionTimeoutError,
_test: {
getBodies: () => invocationBodies,
},
};
});
// Stub helpers not relevant for this test.
vi.mock("@lib/approvals.js", () => ({
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false } as any),
}));
vi.mock("@lib/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
// Now import the agent.
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
describe("cancel clears previous_response_id", () => {
it("second run after cancel should NOT include previous_response_id", async () => {
const { _test } = (await import("openai")) as any;
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: () => {},
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
config: { model: "any", instructions: "" },
});
// First run that triggers a function_call, but we will cancel *before* the
// turn completes so the tool result is never returned.
agent.run([
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "do something" }],
},
] as any);
// Give it a moment to receive the function_call.
await new Promise((r) => setTimeout(r, 40));
// Cancel (simulate ESC ESC).
agent.cancel();
// Second user input.
await agent.run([
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "new command" }],
},
] as any);
const bodies = _test.getBodies();
// eslint-disable-next-line no-console
console.log(JSON.stringify(bodies, null, 2));
expect(bodies.length).toBeGreaterThanOrEqual(2);
// The *last* invocation belongs to the second run (after cancellation).
const found = bodies.some(
(b: any) =>
Array.isArray(b.input) &&
b.input.some(
(i: any) =>
i.type === "function_call_output" && i.call_id === "call123",
),
);
expect(found).toBe(true);
});
});

View File

@@ -0,0 +1,138 @@
import { describe, it, expect, vi } from "vitest";
// This test reproduces the realworld issue where the user cancels the current
// task (Esc Esc) but the models response has already started to stream — the
// partial answer still shows up in the UI.
// --- Mocks -----------------------------------------------------------------
class FakeStream {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
// Immediately start streaming an assistant message so that it is possible
// for a usertriggered cancellation that happens milliseconds later to
// arrive *after* the first token has already been emitted. This mirrors
// the realworld race where the UI shows nothing yet (network / rendering
// latency) even though the model has technically started responding.
// Mimic an assistant message containing the word "hello".
yield {
type: "response.output_item.done",
item: {
type: "message",
role: "assistant",
id: "m1",
content: [{ type: "text", text: "hello" }],
},
} as any;
yield {
type: "response.completed",
response: {
id: "resp1",
status: "completed",
output: [
{
type: "message",
role: "assistant",
id: "m1",
content: [{ type: "text", text: "hello" }],
},
],
},
} as any;
}
}
vi.mock("openai", () => {
let callCount = 0;
class FakeOpenAI {
public responses = {
create: async () => {
callCount += 1;
// Only the *first* stream yields "hello" so that any later answer
// clearly comes from the canceled run.
return callCount === 1
? new FakeStream()
: new (class {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
// empty stream
}
})();
},
};
}
class APIConnectionTimeoutError extends Error {}
return { __esModule: true, default: FakeOpenAI, APIConnectionTimeoutError };
});
// Stubs for external helpers referenced indirectly.
vi.mock("@lib/approvals.js", () => ({
__esModule: true,
isSafeCommand: () => null,
}));
vi.mock("@lib/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
// Stub the logger to avoid filesystem side effects during tests.
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
describe("Agent cancellation race", () => {
// We expect this test to highlight the current bug, so the suite should
// fail (red) until the underlying race condition in `AgentLoop` is fixed.
it("still emits the model answer even though cancel() was called", async () => {
const items: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
config: { model: "any", instructions: "" },
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => items.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const input = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "say hello" }],
},
];
agent.run(input as any);
// Cancel after the stream has started.
await new Promise((r) => setTimeout(r, 5));
agent.cancel();
// Immediately issue a new (empty) command to mimic the UI letting the user
// type something else this resets the agent state.
agent.run([
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "noop" }],
},
] as any);
// Give everything time to flush.
await new Promise((r) => setTimeout(r, 40));
const assistantMsg = items.find((i) => i.role === "assistant");
// The bug manifests if the assistant message is still present even though
// it belongs to the canceled run. We assert that it *should not* be
// delivered this test will fail until the bug is fixed.
expect(assistantMsg).toBeUndefined();
});
});

View File

@@ -0,0 +1,169 @@
import { describe, it, expect, vi } from "vitest";
// Mock the OpenAI SDK used inside AgentLoop so we can control streaming events.
class FakeStream {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
// Immediately yield a function_call item.
yield {
type: "response.output_item.done",
item: {
type: "function_call",
id: "call1",
name: "shell",
arguments: JSON.stringify({ cmd: ["node", "-e", "console.log('hi')"] }),
},
} as any;
// Indicate turn completion with the same function_call.
yield {
type: "response.completed",
response: {
id: "resp1",
status: "completed",
output: [
{
type: "function_call",
id: "call1",
name: "shell",
arguments: JSON.stringify({
cmd: ["node", "-e", "console.log('hi')"],
}),
},
],
},
} as any;
}
}
vi.mock("openai", () => {
class FakeOpenAI {
public responses = {
create: async () => new FakeStream(),
};
}
class APIConnectionTimeoutError extends Error {}
return { __esModule: true, default: FakeOpenAI, APIConnectionTimeoutError };
});
// Mock the approvals and formatCommand helpers referenced by handleexeccommand.
vi.mock("@lib/approvals.js", () => {
return {
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () =>
({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
};
});
vi.mock("@lib/format-command.js", () => {
return {
__esModule: true,
formatCommandForDisplay: (cmd: Array<string>) => cmd.join(" "),
};
});
// Stub the logger to avoid filesystem side effects during tests.
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
// After mocking dependencies we can import the modules under test.
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
import * as handleExec from "../src/utils/agent/handle-exec-command.js";
describe("Agent cancellation", () => {
it("does not emit function_call_output after cancel", async () => {
// Mock handleExecCommand to simulate a slow shell command that would write
// "hello" if allowed to finish.
vi.spyOn(handleExec, "handleExecCommand").mockImplementation(async () => {
await new Promise((r) => setTimeout(r, 50));
return { outputText: "hello", metadata: {} } as any;
});
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
config: { model: "any", instructions: "" },
approvalPolicy: { mode: "auto" } as any,
onItem: (item) => {
received.push(item);
},
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "say hi" }],
},
];
// Start the agent loop but don't await it we'll cancel while it's running.
agent.run(userMsg as any);
// Give the agent a moment to start processing.
await new Promise((r) => setTimeout(r, 10));
// Cancel the task.
agent.cancel();
// Wait a little longer to allow any pending promises to settle.
await new Promise((r) => setTimeout(r, 100));
// Ensure no function_call_output items were emitted after cancellation.
const hasOutput = received.some((i) => i.type === "function_call_output");
expect(hasOutput).toBe(false);
});
it("still suppresses output when cancellation happens after a fast exec", async () => {
vi.restoreAllMocks();
// Quick exec mock (returns immediately).
vi.spyOn(handleExec, "handleExecCommand").mockResolvedValue({
outputText: "hello-fast",
metadata: {},
} as any);
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
config: { model: "any", instructions: "" },
approvalPolicy: { mode: "auto" } as any,
onItem: (item) => received.push(item),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "say hi" }],
},
];
agent.run(userMsg as any);
// Wait a bit so the exec has certainly finished and output is ready.
await new Promise((r) => setTimeout(r, 20));
agent.cancel();
await new Promise((r) => setTimeout(r, 50));
const hasOutput = received.some((i) => i.type === "function_call_output");
expect(hasOutput).toBe(false);
});
});

View File

@@ -0,0 +1,149 @@
import { describe, it, expect, vi } from "vitest";
// ---------------------------------------------------------------------------
// This regression test ensures that the AgentLoop correctly copies the ID of a
// function toolcall (be it `call_id` from the /responses endpoint *or* `id`
// from the /chat endpoint) into the subsequent `function_call_output` item. A
// missing or mismatched ID leads to the dreaded
// 400 | No tool output found for function call …
// error from the OpenAI API.
// ---------------------------------------------------------------------------
// Fake OpenAI stream that immediately yields a *chatstyle* function_call item.
class FakeStream {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
yield {
type: "response.output_item.done",
item: {
// Chat endpoint style (id + nested function descriptor)
type: "function_call",
id: "call_test_123",
function: {
name: "shell",
arguments: JSON.stringify({ cmd: ["echo", "hi"] }),
},
},
} as any;
yield {
type: "response.completed",
response: {
id: "resp1",
status: "completed",
output: [
{
type: "function_call",
id: "call_test_123",
function: {
name: "shell",
arguments: JSON.stringify({ cmd: ["echo", "hi"] }),
},
},
],
},
} as any;
}
}
// We intercept the OpenAI SDK so we can inspect the body of the second call
// the one that is expected to contain our `function_call_output` item.
vi.mock("openai", () => {
let invocation = 0;
let capturedSecondBody: any;
class FakeOpenAI {
public responses = {
create: async (body: any) => {
invocation += 1;
if (invocation === 1) {
return new FakeStream();
}
if (invocation === 2) {
capturedSecondBody = body;
// empty stream
return new (class {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
/* no items */
}
})();
}
throw new Error("Unexpected additional invocation in test");
},
};
}
class APIConnectionTimeoutError extends Error {}
return {
__esModule: true,
default: FakeOpenAI,
APIConnectionTimeoutError,
// Reexport so the test can access the captured body.
_test: {
getCapturedSecondBody: () => capturedSecondBody,
},
};
});
// Stub approvals & command formatting not relevant for this test.
vi.mock("@lib/approvals.js", () => ({
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
}));
vi.mock("@lib/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
// Stub logger to keep the test output clean.
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
// Finally, import the module under test.
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
describe("function_call_output includes original call ID", () => {
it("copies id → call_id so the API accepts the tool result", async () => {
const { _test } = (await import("openai")) as any;
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: () => {},
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "run" }],
},
];
await agent.run(userMsg as any);
// Give the agent a tick to finish the second roundtrip.
await new Promise((r) => setTimeout(r, 20));
const body = _test.getCapturedSecondBody();
expect(body).toBeTruthy();
const outputItem = body.input?.find(
(i: any) => i.type === "function_call_output",
);
expect(outputItem).toBeTruthy();
expect(outputItem.call_id).toBe("call_test_123");
});
});

View File

@@ -0,0 +1,132 @@
import { describe, it, expect, vi } from "vitest";
// ---------------------------------------------------------------------------
// Utility helpers & OpenAI mock (lightweight focuses on network failures)
// ---------------------------------------------------------------------------
const openAiState: { createSpy?: ReturnType<typeof vi.fn> } = {};
vi.mock("openai", () => {
class FakeOpenAI {
public responses = {
create: (...args: Array<any>) => openAiState.createSpy!(...args),
};
}
class APIConnectionTimeoutError extends Error {}
return {
__esModule: true,
default: FakeOpenAI,
APIConnectionTimeoutError,
};
});
// Stub approvals / formatting helpers unrelated to network handling.
vi.mock("@lib/approvals.js", () => ({
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
}));
vi.mock("@lib/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
// Silence debug logs so test output stays clean.
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
describe("AgentLoop generic network/server errors", () => {
it("emits friendly system message instead of throwing on ECONNRESET", async () => {
const netErr: any = new Error("socket hang up");
netErr.code = "ECONNRESET";
openAiState.createSpy = vi.fn(async () => {
throw netErr;
});
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => received.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "ping" }],
},
];
await expect(agent.run(userMsg as any)).resolves.not.toThrow();
// give flush timers a chance
await new Promise((r) => setTimeout(r, 20));
const sysMsg = received.find(
(i) =>
i.role === "system" &&
typeof i.content?.[0]?.text === "string" &&
i.content[0].text.includes("Network error"),
);
expect(sysMsg).toBeTruthy();
});
it("emits user friendly message on HTTP 500 from OpenAI", async () => {
const serverErr: any = new Error("Internal Server Error");
serverErr.status = 500;
openAiState.createSpy = vi.fn(async () => {
throw serverErr;
});
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => received.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "ping" }],
},
];
await expect(agent.run(userMsg as any)).resolves.not.toThrow();
await new Promise((r) => setTimeout(r, 20));
const sysMsg = received.find(
(i) =>
i.role === "system" &&
typeof i.content?.[0]?.text === "string" &&
i.content[0].text.includes("error"),
);
expect(sysMsg).toBeTruthy();
});
});

View File

@@ -0,0 +1,88 @@
import { describe, it, expect, vi } from "vitest";
// ---------------------------------------------------------------------------
// Mock helpers
// ---------------------------------------------------------------------------
const openAiState: { createSpy?: ReturnType<typeof vi.fn> } = {};
vi.mock("openai", () => {
class FakeOpenAI {
public responses = {
create: (...args: Array<any>) => openAiState.createSpy!(...args),
};
}
class APIConnectionTimeoutError extends Error {}
return {
__esModule: true,
default: FakeOpenAI,
APIConnectionTimeoutError,
};
});
vi.mock("@lib/approvals.js", () => ({
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
}));
vi.mock("@lib/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
describe("AgentLoop invalid request / 4xx errors", () => {
it("shows system message and resolves on invalid_request_error", async () => {
const err: any = new Error("Invalid request: model not found");
err.code = "invalid_request_error";
err.status = 400;
openAiState.createSpy = vi.fn(async () => {
throw err;
});
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => received.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "hello" }],
},
];
await expect(agent.run(userMsg as any)).resolves.not.toThrow();
await new Promise((r) => setTimeout(r, 20));
const sysMsg = received.find(
(i) =>
i.role === "system" &&
typeof i.content?.[0]?.text === "string" &&
i.content[0].text.includes("OpenAI rejected"),
);
expect(sysMsg).toBeTruthy();
});
});

View File

@@ -0,0 +1,92 @@
import { describe, it, expect, vi } from "vitest";
// ---------------------------------------------------------------------------
// Mock helpers
// ---------------------------------------------------------------------------
const openAiState: { createSpy?: ReturnType<typeof vi.fn> } = {};
vi.mock("openai", () => {
class FakeOpenAI {
public responses = {
create: (...args: Array<any>) => openAiState.createSpy!(...args),
};
}
class APIConnectionTimeoutError extends Error {}
return {
__esModule: true,
default: FakeOpenAI,
APIConnectionTimeoutError,
};
});
vi.mock("@lib/approvals.js", () => ({
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
}));
vi.mock("@lib/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
describe("AgentLoop max_tokens too large error", () => {
it("shows contextlength system message and resolves", async () => {
const err: any = new Error(
"max_tokens is too large: 167888. This model supports at most 100000 completion tokens, whereas you provided 167888.",
);
err.type = "invalid_request_error";
err.param = "max_tokens";
err.status = 400;
openAiState.createSpy = vi.fn(async () => {
throw err;
});
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => received.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "hello" }],
},
];
await expect(agent.run(userMsg as any)).resolves.not.toThrow();
// allow asynchronous onItem calls to flush
await new Promise((r) => setTimeout(r, 20));
const sysMsg = received.find(
(i) =>
i.role === "system" &&
typeof i.content?.[0]?.text === "string" &&
i.content[0].text.includes("exceeds the maximum context length"),
);
expect(sysMsg).toBeTruthy();
});
});

View File

@@ -0,0 +1,179 @@
import { describe, it, expect, vi } from "vitest";
// ---------------------------------------------------------------------------
// Utility: fake OpenAI SDK with programmable behaviour per test case.
// ---------------------------------------------------------------------------
// A minimal helper to build predetermined streams.
function createStream(events: Array<any>, opts: { throwAfter?: Error } = {}) {
return new (class {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
for (const ev of events) {
yield ev;
}
if (opts.throwAfter) {
throw opts.throwAfter;
}
}
})();
}
// Holders so tests can access spies/state injected by the mock.
const openAiState: {
createSpy?: ReturnType<typeof vi.fn>;
} = {};
vi.mock("openai", () => {
class APIConnectionTimeoutError extends Error {}
class FakeOpenAI {
public responses = {
// `createSpy` will be swapped out per test.
create: (...args: Array<any>) => openAiState.createSpy!(...args),
};
}
return {
__esModule: true,
default: FakeOpenAI,
APIConnectionTimeoutError,
};
});
// Stub approvals / formatting helpers not relevant here.
vi.mock("@lib/approvals.js", () => ({
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
}));
vi.mock("@lib/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
// Silence debug logging from agentloop.
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
describe("AgentLoop network resilience", () => {
it("retries once on APIConnectionTimeoutError and succeeds", async () => {
// Arrange fake OpenAI: first call throws APIConnectionTimeoutError, second returns a short stream.
const { APIConnectionTimeoutError } = await import("openai");
let call = 0;
openAiState.createSpy = vi.fn(async () => {
call += 1;
if (call === 1) {
throw new APIConnectionTimeoutError({ message: "timeout" });
}
// Second attempt minimal assistant reply.
return createStream([
{
type: "response.output_item.done",
item: {
type: "message",
role: "assistant",
id: "m1",
content: [{ type: "text", text: "ok" }],
},
},
{
type: "response.completed",
response: {
id: "r1",
status: "completed",
output: [
{
type: "message",
role: "assistant",
id: "m1",
content: [{ type: "text", text: "ok" }],
},
],
},
},
]);
});
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => received.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "hi" }],
},
];
await agent.run(userMsg as any);
// Wait a tick for flush.
await new Promise((r) => setTimeout(r, 20));
expect(openAiState.createSpy).toHaveBeenCalledTimes(2);
const assistant = received.find((i) => i.role === "assistant");
expect(assistant).toBeTruthy();
expect(assistant.content?.[0]?.text).toBe("ok");
});
it("shows system message when connection closes prematurely", async () => {
const prematureError = new Error("Premature close");
// @ts-ignore add code prop
prematureError.code = "ERR_STREAM_PREMATURE_CLOSE";
openAiState.createSpy = vi.fn(async () => {
return createStream([], { throwAfter: prematureError });
});
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => received.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "hi" }],
},
];
await agent.run(userMsg as any);
// Wait a tick.
await new Promise((r) => setTimeout(r, 20));
const sysMsg = received.find(
(i) =>
i.role === "system" &&
i.content?.[0]?.text?.includes("Connection closed prematurely"),
);
expect(sysMsg).toBeTruthy();
});
});

View File

@@ -0,0 +1,141 @@
import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "fs";
import { tmpdir } from "os";
import { join } from "path";
import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
// ---------------------------------------------------------------------------
// Test helpers & mocks
// ---------------------------------------------------------------------------
// Fake stream returned from the mocked OpenAI SDK. The AgentLoop only cares
// that the stream is asynciterable and eventually yields a `response.completed`
// event so the turn can finish.
class FakeStream {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
yield {
type: "response.completed",
response: {
id: "r1",
status: "completed",
output: [],
},
} as any;
}
}
// Capture the parameters that AgentLoop sends to `openai.responses.create()` so
// we can assert on the `instructions` value.
let lastCreateParams: any = null;
vi.mock("openai", () => {
class FakeOpenAI {
public responses = {
create: async (params: any) => {
lastCreateParams = params;
return new FakeStream();
},
};
}
class APIConnectionTimeoutError extends Error {}
return {
__esModule: true,
default: FakeOpenAI,
APIConnectionTimeoutError,
};
});
// The AgentLoop pulls these helpers in order to decide whether a command can
// be autoapproved. None of that matters for this test, so we stub the module
// with minimal noop implementations.
vi.mock("@lib/approvals.js", () => {
return {
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () =>
({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
};
});
vi.mock("@lib/format-command.js", () => {
return {
__esModule: true,
formatCommandForDisplay: (cmd: Array<string>) => cmd.join(" "),
};
});
// Stub the filebased logger to avoid side effects and keep the test output
// clean.
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
// ---------------------------------------------------------------------------
// After mocks are in place we can import the modules under test.
// ---------------------------------------------------------------------------
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
import { loadConfig } from "../src/utils/config.js";
// ---------------------------------------------------------------------------
let projectDir: string;
beforeEach(() => {
// Create a fresh temporary directory to act as an isolated git repo.
projectDir = mkdtempSync(join(tmpdir(), "codex-proj-"));
mkdirSync(join(projectDir, ".git")); // mark as project root
// Write a small project doc that we expect to be included in the prompt.
writeFileSync(join(projectDir, "codex.md"), "# Test Project\nHello docs!\n");
lastCreateParams = null; // reset captured SDK params
});
afterEach(() => {
rmSync(projectDir, { recursive: true, force: true });
});
describe("AgentLoop", () => {
it("passes codex.md contents through the instructions parameter", async () => {
const config = loadConfig(undefined, undefined, { cwd: projectDir });
// Sanitycheck that loadConfig picked up the project doc. This is *not* the
// main assertion we just avoid a falsepositive if the fixture setup is
// incorrect.
expect(config.instructions).toContain("Hello docs!");
const agent = new AgentLoop({
model: "o3-mini", // arbitrary
instructions: config.instructions,
config,
approvalPolicy: { mode: "suggest" } as any,
onItem: () => {},
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
// Kick off a single run and wait for it to finish. The fake OpenAI client
// will resolve immediately.
await agent.run([
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "ping" }],
},
]);
// Ensure the AgentLoop called the SDK and that the instructions we see at
// that point still include the project doc. This validates the full path:
// loadConfig → AgentLoop → addInstructionPrefix → OpenAI SDK.
expect(lastCreateParams).not.toBeNull();
expect(lastCreateParams.instructions).toContain("Hello docs!");
});
});

View File

@@ -0,0 +1,127 @@
import { describe, it, expect, vi } from "vitest";
// ---------------------------------------------------------------------------
// Utility: fake OpenAI SDK with programmable behaviour per test case.
// ---------------------------------------------------------------------------
// Same helper as used in agent-network-errors.test.ts duplicated here to keep
// the test file selfcontained.
// Exported so that the strict TypeScript compiler does not flag it as unused
// individual tests may import it for adhoc diagnostics when debugging.
export function _createStream(events: Array<any>) {
return new (class {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
for (const ev of events) {
yield ev;
}
}
})();
}
// Holders so tests can access spies/state injected by the mock.
const openAiState: { createSpy?: ReturnType<typeof vi.fn> } = {};
vi.mock("openai", () => {
class RateLimitError extends Error {
public code = "rate_limit_exceeded";
constructor(message: string) {
super(message);
this.name = "RateLimitError";
}
}
// Reexport the timeout error as well so other tests that expect it continue
// to work regardless of execution order.
class APIConnectionTimeoutError extends Error {}
class FakeOpenAI {
public responses = {
// `createSpy` will be swapped out per test.
create: (...args: Array<any>) => openAiState.createSpy!(...args),
};
}
return {
__esModule: true,
default: FakeOpenAI,
RateLimitError,
APIConnectionTimeoutError,
};
});
// Stub approvals / formatting helpers not relevant to ratelimit handling.
vi.mock("@lib/approvals.js", () => ({
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
}));
vi.mock("@lib/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
// Silence debug logging from agentloop so test output remains clean.
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
describe("AgentLoop OpenAI rate limit errors", () => {
it("surfaces a userfriendly system message instead of throwing on RateLimitError (TDD expected to fail)", async () => {
// Arrange fake OpenAI: every call fails with a ratelimit error.
const rateLimitErrMsg =
"Rate limit reached: Limit 20, Used 20, Requested 1. Please try again.";
openAiState.createSpy = vi.fn(async () => {
// Simulate the SDK throwing before any streaming begins.
// In real life this happens when the HTTP response status is 429.
const err: any = new Error(rateLimitErrMsg);
err.code = "rate_limit_exceeded";
throw err;
});
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => received.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "hello" }],
},
];
// The desired behaviour (not yet implemented): AgentLoop should catch the
// ratelimit error, emit a helpful system message and resolve without
// throwing so callers can let the user retry.
await expect(agent.run(userMsg as any)).resolves.not.toThrow();
// Let flush timers run.
await new Promise((r) => setTimeout(r, 20));
const sysMsg = received.find(
(i) =>
i.role === "system" &&
typeof i.content?.[0]?.text === "string" &&
i.content[0].text.includes("Rate limit"),
);
expect(sysMsg).toBeTruthy();
});
});

View File

@@ -0,0 +1,166 @@
import { describe, it, expect, vi } from "vitest";
// Utility: fake OpenAI SDK that can be instructed to fail with 5xx a set
// number of times before succeeding.
function createStream(events: Array<any>) {
return new (class {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
for (const ev of events) {
yield ev;
}
}
})();
}
const openAiState: { createSpy?: ReturnType<typeof vi.fn> } = {};
vi.mock("openai", () => {
class FakeOpenAI {
public responses = {
create: (...args: Array<any>) => openAiState.createSpy!(...args),
};
}
class APIConnectionTimeoutError extends Error {}
return {
__esModule: true,
default: FakeOpenAI,
APIConnectionTimeoutError,
};
});
vi.mock("@lib/approvals.js", () => ({
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
}));
vi.mock("@lib/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
describe("AgentLoop automatic retry on 5xx errors", () => {
it("retries up to 3 times then succeeds", async () => {
// Fail twice with 500 then succeed.
let call = 0;
openAiState.createSpy = vi.fn(async () => {
call += 1;
if (call <= 2) {
const err: any = new Error("Internal Server Error");
err.status = 500;
throw err;
}
return createStream([
{
type: "response.output_item.done",
item: {
type: "message",
role: "assistant",
id: "m1",
content: [{ type: "text", text: "ok" }],
},
},
{
type: "response.completed",
response: {
id: "r1",
status: "completed",
output: [
{
type: "message",
role: "assistant",
id: "m1",
content: [{ type: "text", text: "ok" }],
},
],
},
},
]);
});
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => received.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "hi" }],
},
];
await agent.run(userMsg as any);
await new Promise((r) => setTimeout(r, 20));
expect(openAiState.createSpy).toHaveBeenCalledTimes(3);
const assistant = received.find((i) => i.role === "assistant");
expect(assistant?.content?.[0]?.text).toBe("ok");
});
it("fails after 3 attempts and surfaces system message", async () => {
openAiState.createSpy = vi.fn(async () => {
const err: any = new Error("Internal Server Error");
err.status = 502; // any 5xx
throw err;
});
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => received.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "hello" }],
},
];
await expect(agent.run(userMsg as any)).resolves.not.toThrow();
await new Promise((r) => setTimeout(r, 20));
expect(openAiState.createSpy).toHaveBeenCalledTimes(3);
const sysMsg = received.find(
(i) =>
i.role === "system" &&
typeof i.content?.[0]?.text === "string" &&
i.content[0].text.includes("Network error"),
);
expect(sysMsg).toBeTruthy();
});
});

View File

@@ -0,0 +1,171 @@
import { describe, it, expect, vi } from "vitest";
// --- OpenAI stream mock ----------------------------------------------------
class FakeStream {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
// Immediately ask for a shell function call so we can test that the
// subsequent function_call_output never gets surfaced after terminate().
yield {
type: "response.output_item.done",
item: {
type: "function_call",
id: "callterminate1",
name: "shell",
arguments: JSON.stringify({ cmd: ["sleep", "5"] }),
},
} as any;
// Turn completion echoing the same function call.
yield {
type: "response.completed",
response: {
id: "respterminate1",
status: "completed",
output: [
{
type: "function_call",
id: "callterminate1",
name: "shell",
arguments: JSON.stringify({ cmd: ["sleep", "5"] }),
},
],
},
} as any;
}
}
vi.mock("openai", () => {
class FakeOpenAI {
public responses = {
create: async () => new FakeStream(),
};
}
class APIConnectionTimeoutError extends Error {}
return { __esModule: true, default: FakeOpenAI, APIConnectionTimeoutError };
});
// --- Helpers referenced by handleexeccommand -----------------------------
vi.mock("@lib/approvals.js", () => {
return {
__esModule: true,
alwaysApprovedCommands: new Set<string>(),
canAutoApprove: () =>
({ type: "auto-approve", runInSandbox: false } as any),
isSafeCommand: () => null,
};
});
vi.mock("@lib/format-command.js", () => {
return {
__esModule: true,
formatCommandForDisplay: (cmd: Array<string>) => cmd.join(" "),
};
});
// Stub logger to avoid filesystem sideeffects
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
// After dependency mocks we can import the modules under test.
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
import * as handleExec from "../src/utils/agent/handle-exec-command.js";
describe("Agent terminate (hard cancel)", () => {
it("suppresses function_call_output and stops processing once terminate() is invoked", async () => {
// Simulate a longrunning exec that would normally resolve with output.
vi.spyOn(handleExec, "handleExecCommand").mockImplementation(
async (_args, _config, _policy, _getConf, abortSignal) => {
// Wait until the abort signal is fired or 2s (whichever comes first).
await new Promise<void>((resolve) => {
if (abortSignal?.aborted) {
return resolve();
}
const timer = setTimeout(resolve, 2000);
abortSignal?.addEventListener("abort", () => {
clearTimeout(timer);
resolve();
});
});
return { outputText: "shouldnothappen", metadata: {} } as any;
},
);
const received: Array<any> = [];
const agent = new AgentLoop({
model: "any",
instructions: "",
config: { model: "any", instructions: "" },
approvalPolicy: { mode: "auto" } as any,
onItem: (item) => received.push(item),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "run long cmd" }],
},
];
// Start agent loop but don't wait for completion.
agent.run(userMsg as any);
// Give it a brief moment to start and process the function_call.
await new Promise((r) => setTimeout(r, 10));
agent.terminate();
// Allow promises to settle.
await new Promise((r) => setTimeout(r, 50));
const hasOutput = received.some((i) => i.type === "function_call_output");
expect(hasOutput).toBe(false);
});
it("rejects further run() calls after terminate()", async () => {
const agent = new AgentLoop({
model: "any",
instructions: "",
config: { model: "any", instructions: "" },
approvalPolicy: { mode: "auto" } as any,
onItem: () => {},
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
agent.terminate();
const dummyMsg = [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "noop" }],
},
];
let threw = false;
try {
// We expect this to fail fast either by throwing synchronously or by
// returning a rejected promise.
await agent.run(dummyMsg as any);
} catch {
threw = true;
}
expect(threw).toBe(true);
});
});

View File

@@ -0,0 +1,173 @@
// ---------------------------------------------------------------------------
// Regression test for the "thinking time" counter. Today the implementation
// keeps a *single* starttime across many requests which means that every
// subsequent command will show an everincreasing number such as
// "thinking for 4409s", "thinking for 4424s", … even though the individual
// turn only took a couple of milliseconds. Each request should start its own
// independent timer.
//
// We mark the spec with `.fails()` so that the overall suite remains green
// until the underlying bug is fixed. When the implementation is corrected the
// expectations below will turn green Vitest will then error and remind us to
// remove the `.fails` flag.
// ---------------------------------------------------------------------------
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
import { describe, it, expect, vi } from "vitest";
// --- OpenAI mock -----------------------------------------------------------
/**
* Fake stream that yields a single `response.completed` after a configurable
* delay. This allows us to simulate different thinking times for successive
* requests while using Vitest's fake timers.
*/
class FakeStream {
public controller = { abort: vi.fn() };
private delay: number;
constructor(delay: number) {
this.delay = delay; // milliseconds
}
async *[Symbol.asyncIterator]() {
if (this.delay > 0) {
// Wait the configured delay fake timers will fastforward.
await new Promise((r) => setTimeout(r, this.delay));
}
yield {
type: "response.completed",
response: {
id: `resp-${Date.now()}`,
status: "completed",
output: [
{
type: "message",
role: "assistant",
id: "m1",
content: [{ type: "text", text: "done" }],
},
],
},
} as any;
}
}
/**
* Fake OpenAI client that returns a slower stream for the *first* call and a
* faster one for the second so we can verify that pertask timers reset while
* the global counter accumulates.
*/
vi.mock("openai", () => {
let callCount = 0;
class FakeOpenAI {
public responses = {
create: async () => {
callCount += 1;
return new FakeStream(callCount === 1 ? 10_000 : 500); // 10s vs 0.5s
},
};
}
class APIConnectionTimeoutError extends Error {}
return { __esModule: true, default: FakeOpenAI, APIConnectionTimeoutError };
});
// Stub helpers referenced indirectly so we do not pull in real FS/network
vi.mock("@lib/approvals.js", () => ({
__esModule: true,
isSafeCommand: () => null,
}));
vi.mock("@lib/format-command.js", () => ({
__esModule: true,
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
}));
// Suppress filesystem logging in tests.
vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true,
log: () => {},
isLoggingEnabled: () => false,
}));
describe("thinking time counter", () => {
// Use fake timers for *all* tests in this suite
vi.useFakeTimers();
// Reuse this array to collect all onItem callbacks
let items: Array<any>;
// Helper that runs two agent turns (10s + 0.5s) and populates `items`
async function runScenario() {
items = [];
const agent = new AgentLoop({
config: {} as any,
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => items.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = {
type: "message",
role: "user",
content: [{ type: "input_text", text: "do it" }],
} as any;
// 1⃣ First request simulated 10s thinking time
agent.run([userMsg]);
await vi.advanceTimersByTimeAsync(11_000); // 10s + flush margin
// 2⃣ Second request simulated 0.5s thinking time
agent.run([userMsg]);
await vi.advanceTimersByTimeAsync(1_000); // 0.5s + flush margin
}
// TODO: this is disabled
it.fails("reports correct per-task thinking time per command", async () => {
await runScenario();
const perTaskMsgs = items.filter(
(i) =>
i.role === "system" &&
i.content?.[0]?.text?.startsWith("🤔 Thinking time:"),
);
expect(perTaskMsgs.length).toBe(2);
const perTaskDurations = perTaskMsgs.map((m) => {
const match = m.content[0].text.match(/Thinking time: (\d+) s/);
return match ? parseInt(match[1]!, 10) : NaN;
});
// First run ~10s, second run ~0.5s
expect(perTaskDurations[0]).toBeGreaterThanOrEqual(9);
expect(perTaskDurations[1]).toBeLessThan(3);
});
// TODO: this is disabled
it.fails("reports correct global thinking time accumulation", async () => {
await runScenario();
const globalMsgs = items.filter(
(i) =>
i.role === "system" &&
i.content?.[0]?.text?.startsWith("⏱ Total thinking time:"),
);
expect(globalMsgs.length).toBe(2);
const globalDurations = globalMsgs.map((m) => {
const match = m.content[0].text.match(/Total thinking time: (\d+) s/);
return match ? parseInt(match[1]!, 10) : NaN;
});
// Total after second run should exceed total after first
expect(globalDurations[1]! as number).toBeGreaterThan(globalDurations[0]!);
});
});

View File

@@ -0,0 +1,35 @@
import { describe, it, expect, beforeEach, afterEach } from "vitest";
// We import the module *lazily* inside each test so that we can control the
// OPENAI_API_KEY env var independently per test case. Node's module cache
// would otherwise capture the value present during the first import.
const ORIGINAL_ENV_KEY = process.env["OPENAI_API_KEY"];
beforeEach(() => {
delete process.env["OPENAI_API_KEY"];
});
afterEach(() => {
if (ORIGINAL_ENV_KEY !== undefined) {
process.env["OPENAI_API_KEY"] = ORIGINAL_ENV_KEY;
} else {
delete process.env["OPENAI_API_KEY"];
}
});
describe("config.setApiKey", () => {
it("overrides the exported OPENAI_API_KEY at runtime", async () => {
const { setApiKey, OPENAI_API_KEY } = await import(
"../src/utils/config.js"
);
expect(OPENAI_API_KEY).toBe("");
setApiKey("mykey");
const { OPENAI_API_KEY: liveRef } = await import("../src/utils/config.js");
expect(liveRef).toBe("mykey");
});
});

View File

@@ -0,0 +1,318 @@
import {
ActionType,
apply_commit,
assemble_changes,
DiffError,
identify_files_added,
identify_files_needed,
load_files,
patch_to_commit,
process_patch,
text_to_patch,
} from "../src/utils/agent/apply-patch.js";
import { test, expect } from "vitest";
function createInMemoryFS(initialFiles: Record<string, string>) {
const files: Record<string, string> = { ...initialFiles };
const writes: Record<string, string> = {};
const removals: Array<string> = [];
const openFn = (p: string): string => {
const file = files[p];
if (typeof file === "string") {
return file;
} else {
throw new Error(`File not found: ${p}`);
}
};
const writeFn = (p: string, content: string): void => {
files[p] = content;
writes[p] = content;
};
const removeFn = (p: string): void => {
delete files[p];
removals.push(p);
};
return { openFn, writeFn, removeFn, writes, removals, files };
}
test("process_patch - update file", () => {
const patch = `*** Begin Patch
*** Update File: a.txt
@@
-hello
+hello world
*** End Patch`;
const fs = createInMemoryFS({ "a.txt": "hello" });
const result = process_patch(patch, fs.openFn, fs.writeFn, fs.removeFn);
expect(result).toBe("Done!");
expect(fs.writes).toEqual({ "a.txt": "hello world" });
expect(fs.removals).toEqual([]);
});
test("process_patch - add file", () => {
const patch = `*** Begin Patch
*** Add File: b.txt
+new content
*** End Patch`;
const fs = createInMemoryFS({});
process_patch(patch, fs.openFn, fs.writeFn, fs.removeFn);
expect(fs.writes).toEqual({ "b.txt": "new content" });
expect(fs.removals).toEqual([]);
});
test("process_patch - delete file", () => {
const patch = `*** Begin Patch
*** Delete File: c.txt
*** End Patch`;
const fs = createInMemoryFS({ "c.txt": "to be removed" });
process_patch(patch, fs.openFn, fs.writeFn, fs.removeFn);
expect(fs.writes).toEqual({});
expect(fs.removals).toEqual(["c.txt"]);
});
test("identify_files_needed & identify_files_added", () => {
const patch = `*** Begin Patch
*** Update File: a.txt
*** Delete File: b.txt
*** Add File: c.txt
*** End Patch`;
expect(identify_files_needed(patch).sort()).toEqual(
["a.txt", "b.txt"].sort(),
);
expect(identify_files_added(patch)).toEqual(["c.txt"]);
});
test("process_patch - update file with multiple chunks", () => {
const original = "line1\nline2\nline3\nline4";
const patch = `*** Begin Patch
*** Update File: multi.txt
@@
line1
-line2
+line2 updated
line3
+inserted line
line4
*** End Patch`;
const fs = createInMemoryFS({ "multi.txt": original });
process_patch(patch, fs.openFn, fs.writeFn, fs.removeFn);
const expected = "line1\nline2 updated\nline3\ninserted line\nline4";
expect(fs.writes).toEqual({ "multi.txt": expected });
expect(fs.removals).toEqual([]);
});
test("process_patch - move file (rename)", () => {
const patch = `*** Begin Patch
*** Update File: old.txt
*** Move to: new.txt
@@
-old
+new
*** End Patch`;
const fs = createInMemoryFS({ "old.txt": "old" });
process_patch(patch, fs.openFn, fs.writeFn, fs.removeFn);
expect(fs.writes).toEqual({ "new.txt": "new" });
expect(fs.removals).toEqual(["old.txt"]);
});
test("process_patch - combined add, update, delete", () => {
const patch = `*** Begin Patch
*** Add File: added.txt
+added contents
*** Update File: upd.txt
@@
-old value
+new value
*** Delete File: del.txt
*** End Patch`;
const fs = createInMemoryFS({
"upd.txt": "old value",
"del.txt": "delete me",
});
process_patch(patch, fs.openFn, fs.writeFn, fs.removeFn);
expect(fs.writes).toEqual({
"added.txt": "added contents",
"upd.txt": "new value",
});
expect(fs.removals).toEqual(["del.txt"]);
});
test("process_patch - readme edit", () => {
const original = `
#### Fix an issue
\`\`\`sh
# First, copy an error
# Then, start codex with interactive mode
codex
# Or you can pass in via command line argument
codex "Fix this issue: $(pbpaste)"
# Or even as a task (it should use your current repo and branch)
codex -t "Fix this issue: $(pbpaste)"
\`\`\`
`;
const patch = `*** Begin Patch
*** Update File: README.md
@@
codex -t "Fix this issue: $(pbpaste)"
\`\`\`
+
+hello
*** End Patch`;
const expected = `
#### Fix an issue
\`\`\`sh
# First, copy an error
# Then, start codex with interactive mode
codex
# Or you can pass in via command line argument
codex "Fix this issue: $(pbpaste)"
# Or even as a task (it should use your current repo and branch)
codex -t "Fix this issue: $(pbpaste)"
\`\`\`
hello
`;
const fs = createInMemoryFS({ "README.md": original });
process_patch(patch, fs.openFn, fs.writeFn, fs.removeFn);
expect(fs.writes).toEqual({ "README.md": expected });
});
test("process_patch - invalid patch throws DiffError", () => {
const patch = `*** Begin Patch
*** Update File: missing.txt
@@
+something
*** End Patch`;
const fs = createInMemoryFS({});
expect(() =>
process_patch(patch, fs.openFn, fs.writeFn, fs.removeFn),
).toThrow(DiffError);
});
test("process_patch - tolerates omitted space for keep line", () => {
const original = "line1\nline2\nline3";
const patch = `*** Begin Patch\n*** Update File: foo.txt\n@@\n line1\n-line2\n+some new line2\nline3\n*** End Patch`;
const fs = createInMemoryFS({ "foo.txt": original });
process_patch(patch, fs.openFn, fs.writeFn, fs.removeFn);
expect(fs.files["foo.txt"]).toBe("line1\nsome new line2\nline3");
});
test("assemble_changes correctly detects add, update and delete", () => {
const orig = {
"a.txt": "old",
"b.txt": "keep",
"c.txt": "remove",
};
const updated = {
"a.txt": "new", // update
"b.txt": "keep", // unchanged should be ignored
"c.txt": undefined as unknown as string, // delete
"d.txt": "created", // add
};
const commit = assemble_changes(orig, updated).changes;
expect(commit["a.txt"]).toEqual({
type: ActionType.UPDATE,
old_content: "old",
new_content: "new",
});
expect(commit["c.txt"]).toEqual({
type: ActionType.DELETE,
old_content: "remove",
});
expect(commit["d.txt"]).toEqual({
type: ActionType.ADD,
new_content: "created",
});
// unchanged files should not appear in commit
expect(commit).not.toHaveProperty("b.txt");
});
test("text_to_patch + patch_to_commit handle update and add", () => {
const originalFiles = {
"a.txt": "old line",
};
const patch = `*** Begin Patch
*** Update File: a.txt
@@
-old line
+new line
*** Add File: b.txt
+content new
*** End Patch`;
const [parsedPatch] = text_to_patch(patch, originalFiles);
const commit = patch_to_commit(parsedPatch, originalFiles).changes;
expect(commit["a.txt"]).toEqual({
type: ActionType.UPDATE,
old_content: "old line",
new_content: "new line",
});
expect(commit["b.txt"]).toEqual({
type: ActionType.ADD,
new_content: "content new",
});
});
test("load_files throws DiffError when file is missing", () => {
const { openFn } = createInMemoryFS({ "exists.txt": "hi" });
// intentionally include a missing file in the list
expect(() => load_files(["exists.txt", "missing.txt"], openFn)).toThrow(
DiffError,
);
});
test("apply_commit correctly performs move / rename operations", () => {
const commit = {
changes: {
"old.txt": {
type: ActionType.UPDATE,
old_content: "old",
new_content: "new",
move_path: "new.txt",
},
},
};
const { writeFn, removeFn, writes, removals } = createInMemoryFS({});
apply_commit(commit, writeFn, removeFn);
expect(writes).toEqual({ "new.txt": "new" });
expect(removals).toEqual(["old.txt"]);
});

View File

@@ -0,0 +1,46 @@
import { exec as rawExec } from "../src/utils/agent/sandbox/raw-exec.js";
import { describe, it, expect } from "vitest";
// Import the lowlevel exec implementation so we can verify that AbortSignal
// correctly terminates a spawned process. We bypass the higherlevel wrappers
// to keep the test focused and fast.
describe("exec cancellation", () => {
it("kills the child process when the abort signal is triggered", async () => {
const abortController = new AbortController();
// Spawn a node process that would normally run for 5 seconds before
// printing anything. We should abort long before that happens.
const cmd = ["node", "-e", "setTimeout(() => console.log('late'), 5000);"];
const start = Date.now();
const promise = rawExec(cmd, {}, [], abortController.signal);
// Abort almost immediately.
abortController.abort();
const result = await promise;
const durationMs = Date.now() - start;
// The process should have been terminated rapidly (well under the 5s the
// child intended to run) give it a generous 2s budget.
expect(durationMs).toBeLessThan(2000);
// Exit code should indicate abnormal termination (anything but zero)
expect(result.exitCode).not.toBe(0);
// The child never got a chance to print the word "late".
expect(result.stdout).not.toContain("late");
});
it("allows the process to finish when not aborted", async () => {
const abortController = new AbortController();
const cmd = ["node", "-e", "console.log('finished')"];
const result = await rawExec(cmd, {}, [], abortController.signal);
expect(result.exitCode).toBe(0);
expect(result.stdout.trim()).toBe("finished");
});
});

View File

@@ -0,0 +1,106 @@
import type * as fsType from "fs";
import { loadConfig, saveConfig } from "../src/utils/config.js"; // parent import first
import { tmpdir } from "os";
import { join } from "path";
import { test, expect, beforeEach, afterEach, vi } from "vitest";
// Inmemory FS store
let memfs: Record<string, string> = {};
// Mock out the parts of "fs" that our config module uses:
vi.mock("fs", async () => {
// now `real` is the actual fs module
const real = (await vi.importActual("fs")) as typeof fsType;
return {
...real,
existsSync: (path: string) => memfs[path] !== undefined,
readFileSync: (path: string) => {
if (memfs[path] === undefined) {
throw new Error("ENOENT");
}
return memfs[path];
},
writeFileSync: (path: string, data: string) => {
memfs[path] = data;
},
mkdirSync: () => {
// noop in inmemory store
},
rmSync: (path: string) => {
// recursively delete any key under this prefix
const prefix = path.endsWith("/") ? path : path + "/";
for (const key of Object.keys(memfs)) {
if (key === path || key.startsWith(prefix)) {
delete memfs[key];
}
}
},
};
});
let testDir: string;
let testConfigPath: string;
let testInstructionsPath: string;
beforeEach(() => {
memfs = {}; // reset inmemory store
testDir = tmpdir(); // use the OS temp dir as our "cwd"
testConfigPath = join(testDir, "config.json");
testInstructionsPath = join(testDir, "instructions.md");
});
afterEach(() => {
memfs = {};
});
test("loads default config if files don't exist", () => {
const config = loadConfig(testConfigPath, testInstructionsPath, {
disableProjectDoc: true,
});
expect(config).toEqual({
model: "o4-mini",
instructions: "",
});
});
test("saves and loads config correctly", () => {
const testConfig = {
model: "test-model",
instructions: "test instructions",
};
saveConfig(testConfig, testConfigPath, testInstructionsPath);
// Our inmemory fs should now contain those keys:
expect(memfs[testConfigPath]).toContain(`"model": "test-model"`);
expect(memfs[testInstructionsPath]).toBe("test instructions");
const loadedConfig = loadConfig(testConfigPath, testInstructionsPath, {
disableProjectDoc: true,
});
expect(loadedConfig).toEqual(testConfig);
});
test("loads user instructions + project doc when codex.md is present", () => {
// 1) seed memfs: a config JSON, an instructions.md, and a codex.md in the cwd
const userInstr = "here are user instructions";
const projectDoc = "# Project Title\n\nSome projectspecific doc";
// first, make config so loadConfig will see storedConfig
memfs[testConfigPath] = JSON.stringify({ model: "mymodel" }, null, 2);
// then user instructions:
memfs[testInstructionsPath] = userInstr;
// and now our fake codex.md in the cwd:
const codexPath = join(testDir, "codex.md");
memfs[codexPath] = projectDoc;
// 2) loadConfig without disabling projectdoc, but with cwd=testDir
const cfg = loadConfig(testConfigPath, testInstructionsPath, {
cwd: testDir,
});
// 3) assert we got both pieces concatenated
expect(cfg.model).toBe("mymodel");
expect(cfg.instructions).toBe(
userInstr + "\n\n--- project-doc ---\n\n" + projectDoc,
);
});

View File

@@ -0,0 +1,4 @@
import { test, expect } from "vitest";
test("dummy", () => {
expect(1).toBe(1);
});

View File

@@ -0,0 +1,56 @@
import TextBuffer from "../src/lib/text-buffer";
import { describe, it, expect, vi } from "vitest";
/* -------------------------------------------------------------------------
* External $EDITOR integration behavioural contract
* ---------------------------------------------------------------------- */
describe("TextBuffer open in external $EDITOR", () => {
it("replaces the buffer with the contents saved by the editor", async () => {
// Initial text put into the file.
const initial = [
"// TODO: draft release notes",
"",
"* Fixed memory leak in xyz module.",
].join("\n");
const buf = new TextBuffer(initial);
// -------------------------------------------------------------------
// Stub the child_process.spawnSync call so no real editor launches.
// -------------------------------------------------------------------
const mockSpawn = vi
.spyOn(require("node:child_process"), "spawnSync")
.mockImplementation((_cmd, args: any) => {
const argv = args as Array<string>;
const file = argv[argv.length - 1];
// Lazily append a dummy line our faux "edit".
require("node:fs").appendFileSync(
file,
"\n* Added unit tests for external editor integration.",
);
return { status: 0 } as any;
});
try {
await buf.openInExternalEditor({ editor: "nano" }); // editor param ignored in stub
} finally {
mockSpawn.mockRestore();
}
const want = [
"// TODO: draft release notes",
"",
"* Fixed memory leak in xyz module.",
"* Added unit tests for external editor integration.",
].join("\n");
expect(buf.getText()).toBe(want);
// Cursor should land at the *end* of the newly imported text.
const [row, col] = buf.getCursor();
expect(row).toBe(3); // 4th line (0based)
expect(col).toBe(
"* Added unit tests for external editor integration.".length,
);
});
});

View File

@@ -0,0 +1,16 @@
import { renderTui } from "./ui-test-helpers.js";
import { Markdown } from "../src/components/chat/terminal-chat-response-item.js";
import React from "react";
import { it, expect } from "vitest";
/** Simple sanity check that the Markdown component renders bold/italic text.
* We strip ANSI codes, so the output should contain the raw words. */
it("renders basic markdown", () => {
const { lastFrameStripped } = renderTui(
<Markdown>**bold** _italic_</Markdown>,
);
const frame = lastFrameStripped();
expect(frame).toContain("bold");
expect(frame).toContain("italic");
});

View File

@@ -0,0 +1,70 @@
import { describe, it, expect, vi, afterEach } from "vitest";
// The modelutils module reads OPENAI_API_KEY at import time. We therefore
// need to tweak the env var *before* importing the module in each test and
// make sure the module cache is cleared.
const ORIGINAL_ENV_KEY = process.env["OPENAI_API_KEY"];
// Holders so individual tests can adjust behaviour of the OpenAI mock.
const openAiState: { listSpy?: ReturnType<typeof vi.fn> } = {};
vi.mock("openai", () => {
class FakeOpenAI {
public models = {
// `listSpy` will be swapped out by the tests
list: (...args: Array<any>) => openAiState.listSpy!(...args),
};
}
return {
__esModule: true,
default: FakeOpenAI,
};
});
describe("model-utils offline resilience", () => {
afterEach(() => {
// Restore env var & module cache so tests are isolated.
if (ORIGINAL_ENV_KEY !== undefined) {
process.env["OPENAI_API_KEY"] = ORIGINAL_ENV_KEY;
} else {
delete process.env["OPENAI_API_KEY"];
}
vi.resetModules();
openAiState.listSpy = undefined;
});
it("returns true when API key absent (no network available)", async () => {
delete process.env["OPENAI_API_KEY"];
// Reimport after env change so the module picks up the new state.
vi.resetModules();
const { isModelSupportedForResponses } = await import(
"../src/utils/model-utils.js"
);
const supported = await isModelSupportedForResponses("o4-mini");
expect(supported).toBe(true);
});
it("falls back gracefully when openai.models.list throws a network error", async () => {
process.env["OPENAI_API_KEY"] = "dummy";
const netErr: any = new Error("socket hang up");
netErr.code = "ECONNRESET";
openAiState.listSpy = vi.fn(async () => {
throw netErr;
});
vi.resetModules();
const { isModelSupportedForResponses } = await import(
"../src/utils/model-utils.js"
);
// Should resolve true despite the network failure
const supported = await isModelSupportedForResponses("some-model");
expect(supported).toBe(true);
});
});

View File

@@ -0,0 +1,41 @@
// Ctrl+Enter (CSIu 13;5u) should submit the buffer.
import { renderTui } from "./ui-test-helpers.js";
import MultilineTextEditor from "../src/components/chat/multiline-editor.js";
import * as React from "react";
import { describe, it, expect, vi } from "vitest";
async function type(
stdin: NodeJS.WritableStream,
text: string,
flush: () => Promise<void>,
) {
stdin.write(text);
await flush();
}
describe("MultilineTextEditor Ctrl+Enter submits", () => {
it("calls onSubmit when CSI 13;5u is received", async () => {
const onSubmit = vi.fn();
const { stdin, flush, cleanup } = renderTui(
React.createElement(MultilineTextEditor, {
height: 5,
width: 20,
onSubmit,
}),
);
await flush();
await type(stdin, "hello", flush);
await type(stdin, "\u001B[13;5u", flush); // Ctrl+Enter (modifier 5 = Ctrl)
await flush();
expect(onSubmit).toHaveBeenCalledTimes(1);
expect(onSubmit.mock.calls[0]![0]).toBe("hello");
cleanup();
});
});

View File

@@ -0,0 +1,77 @@
// These tests exercise MultilineTextEditor behaviour when the editor width is
// *not* provided via props so that it has to derive its width from the current
// terminal size. We emulate a terminalresize by mutating
// `process.stdout.columns` and emitting a synthetic `resize` event the
// `useTerminalSize` hook listens for that and causes the component to
// rerender. The test then asserts that
// 1. The rendered line rewraps to the new width, *and*
// 2. The caret (highlighted inverse character) is still kept in view after
// the horizontal shrink so that editing remains possible.
import { renderTui } from "./ui-test-helpers.js";
import MultilineTextEditor from "../src/components/chat/multiline-editor.js";
import * as React from "react";
import { describe, it, expect } from "vitest";
// Helper to synchronously type text then flush Ink's timers so that the next
// `lastFrame()` call sees the updated UI.
async function type(
stdin: NodeJS.WritableStream,
text: string,
flush: () => Promise<void>,
) {
stdin.write(text);
await flush();
}
describe("MultilineTextEditor dynamic width", () => {
// The dynamic horizontal scroll logic is still flaky mark as an expected
// *failing* test so it doesn't break CI until the feature is aligned with
// the Rust implementation.
it("keeps the caret visible when the terminal width shrinks", async () => {
// Fake an initial terminal width large enough that no horizontal
// scrolling is required while we type the long alphabet sequence.
process.stdout.columns = 40; // width seen by useTerminalSize (after padding)
const { stdin, lastFrame, flush, cleanup } = renderTui(
React.createElement(MultilineTextEditor, {
initialText: "",
// width *omitted* component should fall back to terminal columns
height: 3,
}),
);
// Ensure initial render completes.
await flush();
// Type the alphabet longer than the width we'll shrink to.
const alphabet = "abcdefghijklmnopqrstuvwxyz";
await type(stdin, alphabet, flush);
// The cursor (block) now sits on the far right after the 'z'. Verify that
// the character 'z' is visible in the current frame.
expect(lastFrame()?.includes("z")).toBe(true);
/* ----------------------- Simulate resize ----------------------- */
// Shrink the reported terminal width so that the previously visible slice
// would no longer include the cursor *unless* the editor recomputes
// scroll offsets on rerender.
process.stdout.columns = 20; // shrink significantly (remember: padding8)
process.stdout.emit("resize"); // notify listeners
// Allow Ink to schedule the state update and then perform the rerender.
await flush();
await flush();
// After the resize the editor should have scrolled horizontally so that
// the caret (and thus the 'z' character that is blockhighlighted) remains
// visible in the rendered slice.
const frameAfter = lastFrame() || "";
// eslint-disable-next-line no-console
console.log("FRAME AFTER RESIZE:\n" + frameAfter);
expect(frameAfter.includes("z")).toBe(true);
cleanup();
});
});

View File

@@ -0,0 +1,41 @@
// Plain Enter (CR) should submit.
import { renderTui } from "./ui-test-helpers.js";
import MultilineTextEditor from "../src/components/chat/multiline-editor.js";
import * as React from "react";
import { describe, it, expect, vi } from "vitest";
async function type(
stdin: NodeJS.WritableStream,
text: string,
flush: () => Promise<void>,
) {
stdin.write(text);
await flush();
}
describe("MultilineTextEditor Enter submits (CR)", () => {
it("calls onSubmit when \r is received", async () => {
const onSubmit = vi.fn();
const { stdin, flush, cleanup } = renderTui(
React.createElement(MultilineTextEditor, {
height: 5,
width: 20,
onSubmit,
}),
);
await flush();
await type(stdin, "hello", flush);
await type(stdin, "\r", flush);
await flush();
expect(onSubmit).toHaveBeenCalledTimes(1);
expect(onSubmit.mock.calls[0]![0]).toBe("hello");
cleanup();
});
});

View File

@@ -0,0 +1,64 @@
import { renderTui } from "./ui-test-helpers.js";
import MultilineTextEditor from "../src/components/chat/multiline-editor.js";
import TextBuffer from "../src/lib/text-buffer.js";
import * as React from "react";
import { describe, it, expect, vi } from "vitest";
async function type(
stdin: NodeJS.WritableStream,
text: string,
flush: () => Promise<void>,
) {
stdin.write(text);
await flush();
}
describe("MultilineTextEditor external editor shortcut", () => {
it("fires openInExternalEditor on CtrlE (single key)", async () => {
const spy = vi
.spyOn(TextBuffer.prototype as any, "openInExternalEditor")
.mockResolvedValue(undefined);
const { stdin, flush, cleanup } = renderTui(
React.createElement(MultilineTextEditor, {
initialText: "hello",
width: 20,
height: 3,
}),
);
// Ensure initial render.
await flush();
// Send CtrlE → should fire immediately
await type(stdin, "\x05", flush); // CtrlE (ENQ / 0x05)
expect(spy).toHaveBeenCalledTimes(1);
spy.mockRestore();
cleanup();
});
it("fires openInExternalEditor on CtrlX (single key)", async () => {
const spy = vi
.spyOn(TextBuffer.prototype as any, "openInExternalEditor")
.mockResolvedValue(undefined);
const { stdin, flush, cleanup } = renderTui(
React.createElement(MultilineTextEditor, {
initialText: "hello",
width: 20,
height: 3,
}),
);
// Ensure initial render.
await flush();
// Send CtrlX → should fire immediately
await type(stdin, "\x18", flush); // CtrlX (SUB / 0x18)
expect(spy).toHaveBeenCalledTimes(1);
spy.mockRestore();
cleanup();
});
});

View File

@@ -0,0 +1,171 @@
/* --------------------------------------------------------------------------
* Regression test chat history navigation (↑/↓) should *only* activate
* once the caret reaches the very first / last line of the multiline input.
*
* Current buggy behaviour: TerminalChatInput intercepts the uparrow at the
* outer <useInput> handler regardless of the caret row, causing an immediate
* history recall even when the user is still somewhere within a multiline
* draft. The test captures the *expected* behaviour (matching e.g. Bash,
* zsh, Readline, etc.) the ↑ key must first move the caret vertically to
* the topmost row; only a *subsequent* press should start cycling through
* previous messages.
*
* The spec is written *before* the fix so we mark it as an expected failure
* (it.todo) until the implementation is aligned.
* ----------------------------------------------------------------------- */
import { renderTui } from "./ui-test-helpers.js";
import * as React from "react";
import { describe, it, expect, vi } from "vitest";
// ---------------------------------------------------------------------------
// Module mocks *must* be registered *before* the module under test is
// imported so that Vitest can replace the dependency during evaluation.
// ---------------------------------------------------------------------------
// The chatinput component relies on an async helper that performs filesystem
// work when images are referenced. Mock it so our unit test remains fast and
// free of sideeffects.
vi.mock("../src/utils/input-utils.js", () => ({
createInputItem: vi.fn(async (text: string /*, images: Array<string> */) => ({
role: "user",
type: "message",
content: [{ type: "input_text", text }],
})),
}));
// Mock the optional @lib/* dependencies so the dynamic import in parsers.ts
// does not fail during the test environment where the alias isn't configured.
vi.mock("@lib/format-command.js", () => ({
formatCommandForDisplay: (cmd: Array<string>) => cmd.join(" "),
}));
vi.mock("@lib/approvals.js", () => ({
isSafeCommand: (_cmd: Array<string>) => null,
}));
// After mocks are in place we can safely import the component under test.
import TerminalChatInput from "../src/components/chat/terminal-chat-new-input.js";
// Tiny helper mirroring the one used in other UI tests so we can await Ink's
// internal promises between keystrokes.
async function type(
stdin: NodeJS.WritableStream,
text: string,
flush: () => Promise<void>,
) {
stdin.write(text);
await flush();
}
/** Build a set of noop callbacks so <TerminalChatInput> renders with minimal
* scaffolding.
*/
function stubProps(): any {
return {
isNew: true,
loading: false,
submitInput: vi.fn(),
confirmationPrompt: null,
submitConfirmation: vi.fn(),
setLastResponseId: vi.fn(),
// Cast to any to satisfy the generic React.Dispatch signature without
// pulling the ResponseItem type into the test bundle.
setItems: (() => {}) as any,
contextLeftPercent: 100,
openOverlay: vi.fn(),
openModelOverlay: vi.fn(),
openHelpOverlay: vi.fn(),
interruptAgent: vi.fn(),
active: true,
};
}
describe("TerminalChatInput history navigation with multiline drafts", () => {
it("should not recall history until caret is on the first line", async () => {
const { stdin, lastFrameStripped, flush, cleanup } = renderTui(
React.createElement(TerminalChatInput, stubProps()),
);
// -------------------------------------------------------------------
// 1. Submit one previous message so that history isn't empty.
// -------------------------------------------------------------------
for (const ch of ["p", "r", "e", "v"]) {
await type(stdin, ch, flush);
}
await type(stdin, "\r", flush); // <Enter/Return> submits the text
// Let the async onSubmit finish (mocked so it's immediate, but flush once
// more to allow state updates to propagate).
await flush();
// -------------------------------------------------------------------
// 2. Start a *multiline* draft so that the caret ends up on row 1.
// -------------------------------------------------------------------
await type(stdin, "line1", flush);
await type(stdin, "\n", flush); // newline inside the editor (Shift+Enter)
await type(stdin, "line2", flush);
// Sanitycheck both lines should be visible in the current frame.
const frameBefore = lastFrameStripped();
expect(frameBefore.includes("line1")).toBe(true);
expect(frameBefore.includes("line2")).toBe(true);
// -------------------------------------------------------------------
// 3. Press ↑ once. Expected: caret moves from (row:1) -> (row:0) but
// NO history recall yet, so the text stays unchanged.
// -------------------------------------------------------------------
await type(stdin, "\x1b[A", flush); // uparrow
const frameAfter = lastFrameStripped();
// The buffer should be unchanged we *haven't* entered historynavigation
// mode yet because the caret only moved vertically inside the draft.
expect(frameAfter.includes("prev")).toBe(false);
expect(frameAfter.includes("line1")).toBe(true);
cleanup();
});
it("should restore the draft when navigating forward (↓) past the newest history entry", async () => {
const { stdin, lastFrameStripped, flush, cleanup } = renderTui(
React.createElement(TerminalChatInput, stubProps()),
);
// Submit one message so we have history to recall later.
for (const ch of ["p", "r", "e", "v"]) {
await type(stdin, ch, flush);
}
await type(stdin, "\r", flush); // <Enter> submit
await flush();
// Begin a multiline draft that we'll want to recover later.
await type(stdin, "draft1", flush);
await type(stdin, "\n", flush); // newline inside editor
await type(stdin, "draft2", flush);
// Record the frame so we can later assert that it comes back.
const draftFrame = lastFrameStripped();
expect(draftFrame.includes("draft1")).toBe(true);
expect(draftFrame.includes("draft2")).toBe(true);
// ────────────────────────────────────────────────────────────────────
// 1) Hit ↑ twice: first press just moves the caret to row0, second
// enters history mode and shows the previous message ("prev").
// ────────────────────────────────────────────────────────────────────
await type(stdin, "\x1b[A", flush); // first up vertical move only
await type(stdin, "\x1b[A", flush); // second up recall history
const historyFrame = lastFrameStripped();
expect(historyFrame.includes("prev")).toBe(true);
// 2) Hit ↓ once should exit history mode and restore the original draft
// (multiline input).
await type(stdin, "\x1b[B", flush); // downarrow
const restoredFrame = lastFrameStripped();
expect(restoredFrame.includes("draft1")).toBe(true);
expect(restoredFrame.includes("draft2")).toBe(true);
cleanup();
});
});

View File

@@ -0,0 +1,164 @@
import { renderTui } from "./ui-test-helpers.js";
import MultilineTextEditor from "../src/components/chat/multiline-editor.js";
import * as React from "react";
import { describe, it, expect, vi } from "vitest";
// Helper that lets us type and then immediately flush ink's async timers
async function type(
stdin: NodeJS.WritableStream,
text: string,
flush: () => Promise<void>,
) {
stdin.write(text);
await flush();
}
describe("MultilineTextEditor", () => {
it("renders the initial text", async () => {
const { lastFrame, cleanup, waitUntilExit } = renderTui(
React.createElement(MultilineTextEditor, {
initialText: "hello",
width: 10,
height: 3,
}),
);
await waitUntilExit(); // initial render
expect(lastFrame()?.includes("hello")).toBe(true);
cleanup();
});
it("updates the buffer when typing and shows the change", async () => {
const {
stdin,
lastFrame,
cleanup,
waitUntilExit: _,
flush,
} = renderTui(
React.createElement(MultilineTextEditor, {
initialText: "",
width: 10,
height: 3,
}),
);
// Type "h"
await type(stdin, "h", flush);
expect(lastFrame()?.includes("h")).toBe(true);
// Type "i"
await type(stdin, "i", flush);
expect(lastFrame()?.includes("hi")).toBe(true);
cleanup();
});
it("calls onSubmit with the current text on <Esc>", async () => {
const onSubmit = vi.fn();
const { stdin, flush, cleanup } = renderTui(
React.createElement(MultilineTextEditor, {
initialText: "foo",
width: 10,
height: 3,
onSubmit,
}),
);
// Press Escape
await type(stdin, "\x1b", flush);
expect(onSubmit).toHaveBeenCalledTimes(1);
expect(onSubmit).toHaveBeenCalledWith("foo");
cleanup();
});
it("updates text when backspacing", async () => {
const { stdin, lastFrameStripped, flush, cleanup, waitUntilExit } =
renderTui(
React.createElement(MultilineTextEditor, {
initialText: "",
width: 10,
height: 3,
}),
);
await waitUntilExit();
// Type "hello"
stdin.write("hello");
await flush();
expect(lastFrameStripped().includes("hello")).toBe(true);
// Send 2× backspace (DEL / 0x7f)
stdin.write("\x7f\x7f");
await flush();
const frame = lastFrameStripped();
expect(frame.includes("hel")).toBe(true);
expect(frame.includes("hell")).toBe(false);
cleanup();
});
it("three consecutive backspaces after typing 'hello' leaves 'he'", async () => {
const { stdin, lastFrameStripped, flush, cleanup, waitUntilExit } =
renderTui(
React.createElement(MultilineTextEditor, {
initialText: "",
width: 10,
height: 3,
}),
);
await waitUntilExit();
stdin.write("hello");
await flush();
// 3 backspaces
stdin.write("\x7f\x7f\x7f");
await flush();
const frame = lastFrameStripped();
expect(frame.includes("he")).toBe(true);
expect(frame.includes("hel")).toBe(false);
expect(frame.includes("hello")).toBe(false);
cleanup();
});
/* -------------------------------------------------------------- */
/* Caret highlighting semantics */
/* -------------------------------------------------------------- */
it("highlights the character *under* the caret (after arrow moves)", async () => {
const { stdin, lastFrame, flush, cleanup, waitUntilExit } = renderTui(
React.createElement(MultilineTextEditor, {
initialText: "",
width: 10,
height: 3,
}),
);
await waitUntilExit();
// Type "bar" and move caret left twice
stdin.write("bar");
stdin.write("\x1b[D");
await flush();
stdin.write("\x1b[D");
await flush(); // ensure each arrow processed
const frameRaw = lastFrame() || "";
// eslint-disable-next-line no-console
console.log("DEBUG frame:", frameRaw);
const highlightedMatch = frameRaw.match(/\x1b\[7m(.)\x1b\[27m/);
expect(highlightedMatch).not.toBeNull();
const highlightedChar = highlightedMatch ? highlightedMatch[1] : null;
expect(highlightedChar).toBe("a"); // caret should blockhighlight 'a'
cleanup();
});
});

View File

@@ -0,0 +1,56 @@
import { renderTui } from "./ui-test-helpers.js";
import MultilineTextEditor from "../src/components/chat/multiline-editor.js";
import * as React from "react";
import { describe, it, expect } from "vitest";
// Helper to send keystrokes and wait for Ink's async timing so that the frame
// reflects the input.
async function type(
stdin: NodeJS.WritableStream,
text: string,
flush: () => Promise<void>,
) {
stdin.write(text);
await flush();
}
describe("MultilineTextEditor inserting new lines", () => {
// Same as above the React wrapper still differs from the Rust reference
// when handling <Enter>. Keep the test around but mark it as expected to
// fail.
it("splits the line and renders the new row when <Enter> is pressed", async () => {
const { stdin, lastFrameStripped, flush, cleanup } = renderTui(
React.createElement(MultilineTextEditor, {
height: 5,
width: 20,
initialText: "",
}),
);
// Wait for first render
await flush();
// Type "hello", press Enter, then type "world"
await type(stdin, "hello", flush);
await type(stdin, "\n", flush); // Enter / Return
await type(stdin, "world", flush);
const frame = lastFrameStripped();
const lines = frame.split("\n");
// eslint-disable-next-line no-console
console.log(
"\n--- RENDERED FRAME ---\n" + frame + "\n---------------------",
);
// We expect at least two rendered lines and the texts to appear on their
// own respective rows.
expect(lines.length).toBeGreaterThanOrEqual(2);
// First rendered (inside border) line should contain 'hello'
expect(lines.some((l: string) => l.includes("hello"))).toBe(true);
// Another line should contain 'world'
expect(lines.some((l: string) => l.includes("world"))).toBe(true);
cleanup();
});
});

View File

@@ -0,0 +1,51 @@
// Regression test: Some terminals emit a carriagereturn ("\r") for
// Shift+Enter instead of a bare linefeed. Pressing Shift+Enter in that
// environment should insert a newline **without** triggering submission.
import { renderTui } from "./ui-test-helpers.js";
import MultilineTextEditor from "../src/components/chat/multiline-editor.js";
import * as React from "react";
import { describe, it, expect, vi } from "vitest";
async function type(
stdin: NodeJS.WritableStream,
text: string,
flush: () => Promise<void>,
) {
stdin.write(text);
await flush();
}
describe("MultilineTextEditor Shift+Enter (\r variant)", () => {
it("inserts a newline and does NOT submit when the terminal sends \r for Shift+Enter", async () => {
const onSubmit = vi.fn();
const { stdin, lastFrameStripped, flush, cleanup } = renderTui(
React.createElement(MultilineTextEditor, {
height: 5,
width: 20,
initialText: "",
onSubmit,
}),
);
await flush();
// Type some text then press Shift+Enter (simulated by kitty CSI-u seq).
await type(stdin, "foo", flush);
await type(stdin, "\u001B[13;2u", flush); // ESC [ 13 ; 2 u
await type(stdin, "bar", flush);
const frame = lastFrameStripped();
expect(frame).toMatch(/foo/);
expect(frame).toMatch(/bar/);
// Must have inserted a newline (two rendered lines inside the frame)
expect(frame.split("\n").length).toBeGreaterThanOrEqual(2);
// No submission should have occurred
expect(onSubmit).not.toHaveBeenCalled();
cleanup();
});
});

View File

@@ -0,0 +1,49 @@
import { renderTui } from "./ui-test-helpers.js";
import MultilineTextEditor from "../src/components/chat/multiline-editor.js";
import * as React from "react";
import { describe, it, expect, vi } from "vitest";
async function type(
stdin: NodeJS.WritableStream,
text: string,
flush: () => Promise<void>,
) {
stdin.write(text);
await flush();
}
describe("MultilineTextEditor Shift+Enter", () => {
it("inserts a newline instead of submitting", async () => {
const onSubmit = vi.fn();
const { stdin, lastFrameStripped, flush, cleanup } = renderTui(
React.createElement(MultilineTextEditor, {
height: 5,
width: 20,
initialText: "",
onSubmit,
}),
);
await flush();
// type 'hi'
await type(stdin, "hi", flush);
// send Shift+Enter simulated by \n without key.return. Ink's test stdin
// delivers raw bytes only, so we approximate by writing "\n" directly.
await type(stdin, "\n", flush);
// type 'there'
await type(stdin, "there", flush);
const frame = lastFrameStripped();
expect(frame).toMatch(/hi/);
expect(frame).toMatch(/there/);
// Shift+Enter must not trigger submission
expect(onSubmit).not.toHaveBeenCalled();
cleanup();
});
});

View File

@@ -0,0 +1,57 @@
import { loadConfig, PROJECT_DOC_MAX_BYTES } from "../src/utils/config.js";
import { mkdirSync, rmSync, writeFileSync, mkdtempSync } from "fs";
import { tmpdir } from "os";
import { join } from "path";
import { describe, expect, test, beforeEach, afterEach, vi } from "vitest";
let projectDir: string;
let configPath: string;
let instructionsPath: string;
beforeEach(() => {
projectDir = mkdtempSync(join(tmpdir(), "codex-proj-"));
// Create fake .git dir to mark project root
mkdirSync(join(projectDir, ".git"));
// Config & instructions paths under temp dir so we don't pollute real homedir
configPath = join(projectDir, "config.json");
instructionsPath = join(projectDir, "instructions.md");
});
afterEach(() => {
rmSync(projectDir, { recursive: true, force: true });
});
describe("project doc integration", () => {
test("happy path: project doc gets merged into instructions", () => {
const docContent = "# Project\nThis is my project.";
writeFileSync(join(projectDir, "codex.md"), docContent);
const cfg = loadConfig(configPath, instructionsPath, { cwd: projectDir });
expect(cfg.instructions).toContain(docContent);
});
test("opt-out via flag prevents inclusion", () => {
const docContent = "will be ignored";
writeFileSync(join(projectDir, "codex.md"), docContent);
const cfg = loadConfig(configPath, instructionsPath, {
cwd: projectDir,
disableProjectDoc: true,
});
expect(cfg.instructions).not.toContain(docContent);
});
test("file larger than limit gets truncated and warns", () => {
const big = "x".repeat(PROJECT_DOC_MAX_BYTES + 4096);
writeFileSync(join(projectDir, "codex.md"), big);
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
const cfg = loadConfig(configPath, instructionsPath, { cwd: projectDir });
expect(cfg.instructions.length).toBe(PROJECT_DOC_MAX_BYTES);
expect(warnSpy).toHaveBeenCalledOnce();
warnSpy.mockRestore();
});
});

View File

@@ -0,0 +1,64 @@
import { describe, it, expect } from "vitest";
import { exec as rawExec } from "../src/utils/agent/sandbox/raw-exec.js";
// Regression test: When cancelling an inflight `rawExec()` the implementation
// must terminate *all* processes that belong to the spawned command not just
// the direct child. The original logic only sent `SIGTERM` to the immediate
// child which meant that grandchildren (for instance when running through a
// `bash -c` wrapper) were left running and turned into "zombie" processes.
// Strategy:
// 1. Start a Bash shell that spawns a longrunning `sleep`, prints the PID
// of that `sleep`, and then waits forever. This guarantees we can later
// check if the grandchild is still alive.
// 2. Abort the exec almost immediately.
// 3. After `rawExec()` resolves we probe the previously printed PID with
// `process.kill(pid, 0)`. If the call throws `ESRCH` the process no
// longer exists the desired outcome. Otherwise the test fails.
// The negativePID processgroup trick employed by the fixed implementation is
// POSIXonly. On Windows we skip the test.
describe("rawExec abort kills entire process group", () => {
it("terminates grandchildren spawned via bash", async () => {
if (process.platform === "win32") {
return;
}
const abortController = new AbortController();
// Bash script: spawn `sleep 30` in background, print its PID, then wait.
const script = "sleep 30 & pid=$!; echo $pid; wait $pid";
const cmd = ["bash", "-c", script];
// Kick off the command.
const execPromise = rawExec(cmd, {}, [], abortController.signal);
// Give Bash a tiny bit of time to start and print the PID.
await new Promise((r) => setTimeout(r, 50));
// Cancel the task this should kill *both* bash and the inner sleep.
abortController.abort();
const { exitCode, stdout } = await execPromise;
// We expect a nonzero exit code because the process was killed.
expect(exitCode).not.toBe(0);
// Extract the grandchild PID from stdout.
const pidMatch = /^(\d+)/.exec(stdout.trim());
expect(pidMatch).not.toBeNull();
const sleepPid = Number(pidMatch![1]);
// Verify that the sleep process is no longer alive.
let alive = true;
try {
process.kill(sleepPid, 0); // throws if the process does not exist
alive = true;
} catch {
alive = false;
}
expect(alive).toBe(false);
});
});

View File

@@ -0,0 +1,59 @@
import { renderTui } from "./ui-test-helpers.js";
import TerminalChatResponseItem from "../src/components/chat/terminal-chat-response-item.js";
import React from "react";
import { describe, it, expect } from "vitest";
// Component under test
// The ResponseItem type is complex and imported from the OpenAI SDK. To keep
// this test lightweight we construct the minimal runtime objects we need and
// cast them to `any` so that TypeScript is satisfied.
function userMessage(text: string) {
return {
type: "message",
role: "user",
content: [
{
type: "input_text",
text,
},
],
} as any;
}
function assistantMessage(text: string) {
return {
type: "message",
role: "assistant",
content: [
{
type: "output_text",
text,
},
],
} as any;
}
describe("TerminalChatResponseItem", () => {
it("renders a user message", () => {
const { lastFrameStripped } = renderTui(
<TerminalChatResponseItem item={userMessage("Hello world")} />,
);
const frame = lastFrameStripped();
expect(frame).toContain("user");
expect(frame).toContain("Hello world");
});
it("renders an assistant message", () => {
const { lastFrameStripped } = renderTui(
<TerminalChatResponseItem item={assistantMessage("Sure thing")} />,
);
const frame = lastFrameStripped();
// assistant messages are labelled "codex" in the UI
expect(frame.toLowerCase()).toContain("codex");
expect(frame).toContain("Sure thing");
});
});

View File

@@ -0,0 +1,50 @@
import TextBuffer from "../src/lib/text-buffer.js";
import { describe, it, expect } from "vitest";
// These tests ensure that the TextBuffer copy&paste logic keeps parity with
// the Rust reference implementation (`textarea.rs`). When a multiline
// string *without* a trailing newline is pasted at the beginning of a line,
// the final pasted line should be merged with the text that originally
// followed the caret exactly how most editors behave.
function setupBuffer(): TextBuffer {
return new TextBuffer("ab\ncd\nef");
}
describe("TextBuffer copy/paste multiline", () => {
it("copies a multiline selection without the trailing newline", () => {
const buf = setupBuffer();
// Select from (0,0) → (1,2) ["ab", "cd"]
buf.startSelection(); // anchor at 0,0
buf.move("down"); // 1,0
buf.move("right");
buf.move("right"); // 1,2
const copied = buf.copy();
expect(copied).toBe("ab\ncd");
});
it("pastes the multiline clipboard as separate lines (does not merge with following text)", () => {
const buf = setupBuffer();
// Make the same selection and copy
buf.startSelection();
buf.move("down");
buf.move("right");
buf.move("right");
buf.copy();
// Move caret to the start of the last line and paste
buf.move("down");
buf.move("home"); // (2,0)
const ok = buf.paste();
expect(ok).toBe(true);
// Desired final buffer behaviour should match the Rust reference:
// the final pasted line is *merged* with the original text on the
// insertion row.
expect(buf.getLines()).toEqual(["ab", "cd", "ab", "cdef"]);
});
});

View File

@@ -0,0 +1,14 @@
import TextBuffer from "../src/lib/text-buffer.js";
import { describe, it, expect } from "vitest";
describe("TextBuffer newline normalisation", () => {
it("insertStr should split on \r and \r\n sequences", () => {
const buf = new TextBuffer("");
// Windowsstyle CRLF
buf.insertStr("ab\r\ncd\r\nef");
expect(buf.getLines()).toEqual(["ab", "cd", "ef"]);
expect(buf.getCursor()).toEqual([2, 2]); // after 'f'
});
});

View File

@@ -0,0 +1,250 @@
import TextBuffer from "../src/lib/text-buffer";
import { describe, it, expect } from "vitest";
// The purpose of this testsuite is NOT to make the implementation green today
// quite the opposite. We capture behaviours that are already covered by the
// reference Rust implementation (textarea.rs) but are *still missing* from the
// current TypeScript port. Every test is therefore marked with `.fails()` so
// that the suite passes while the functionality is absent. When a particular
// gap is closed the corresponding test will begin to succeed, causing Vitest to
// raise an error (a *good* error) that reminds us to remove the `.fails` flag.
/* -------------------------------------------------------------------------- */
/* Softtab insertion */
/* -------------------------------------------------------------------------- */
describe("softtab insertion (↹ => 4 spaces)", () => {
it.fails(
"inserts 4 spaces at caret position when hardtab mode is off",
() => {
const buf = new TextBuffer("");
// A literal "\t" character is treated as user pressing the Tab key. The
// Rust version expands it to softtabs by default.
buf.insert("\t");
expect(buf.getText()).toBe(" ");
expect(buf.getCursor()).toEqual([0, 4]);
},
);
});
/* -------------------------------------------------------------------------- */
/* Undo / Redo grouping & stack clearing */
/* -------------------------------------------------------------------------- */
describe("undo / redo advanced behaviour", () => {
it.fails(
"typing a word characterbycharacter should undo in one step",
() => {
const buf = new TextBuffer("");
for (const ch of "hello") {
buf.insert(ch);
}
// One single undo should revert the *whole* word, leaving empty buffer.
buf.undo();
expect(buf.getText()).toBe("");
expect(buf.getCursor()).toEqual([0, 0]);
},
);
});
/* -------------------------------------------------------------------------- */
/* Selection cut / delete selection */
/* -------------------------------------------------------------------------- */
describe("selection cut/delete", () => {
it.fails(
"cut() removes the selected range and yanks it into clipboard",
() => {
const buf = new TextBuffer("foo bar baz");
// Select the middle word "bar"
buf.move("wordRight"); // after "foo" + space => col 4
buf.startSelection();
buf.move("wordRight"); // after "bar" (col 8)
// @ts-expect-error method missing in current implementation
buf.cut();
// Text should now read "foo baz" (two spaces collapsed only if impl trims)
expect(buf.getText()).toBe("foo baz");
// Cursor should be at the start of the gap where text was removed
expect(buf.getCursor()).toEqual([0, 4]);
// And clipboard/yank buffer should contain the deleted word
// @ts-expect-error clipboard getter not exposed yet
expect(buf.getClipboard()).toBe("bar");
},
);
});
/* -------------------------------------------------------------------------- */
/* Wordwise forward deletion (Ctrl+Delete) */
/* -------------------------------------------------------------------------- */
describe("delete_next_word (Ctrl+Delete)", () => {
it.fails("removes everything until the next word boundary", () => {
const vp = { width: 80, height: 25 };
const buf = new TextBuffer("hello world!! next");
// Place caret at start of line (0,0). One Ctrl+Delete should wipe the
// word "hello" and the following space.
buf.handleInput(undefined, { delete: true, ctrl: true }, vp);
expect(buf.getText()).toBe("world!! next");
expect(buf.getCursor()).toEqual([0, 0]);
});
});
/* -------------------------------------------------------------------------- */
/* Configurable tab length */
/* -------------------------------------------------------------------------- */
describe("tab length configuration", () => {
it.fails("inserts the configured number of spaces when tabLen=2", () => {
// @ts-expect-error constructor currently has no config object
const buf = new TextBuffer("", { tabLen: 2 });
buf.insert("\t");
expect(buf.getText()).toBe(" "); // two spaces
expect(buf.getCursor()).toEqual([0, 2]);
});
});
/* -------------------------------------------------------------------------- */
/* Search subsystem */
/* -------------------------------------------------------------------------- */
describe("search / regex navigation", () => {
it.fails("search_forward jumps to the next match", () => {
const text = [
"alpha beta gamma",
"beta gamma alpha",
"gamma alpha beta",
].join("\n");
const buf = new TextBuffer(text);
// @ts-expect-error method missing
buf.setSearchPattern(/beta/);
// Cursor starts at 0,0. First search_forward should land on the first
// occurrence (row 0, col 6)
// @ts-expect-error method missing
buf.searchForward();
expect(buf.getCursor()).toEqual([0, 6]);
// Second invocation should wrap within viewport and find next occurrence
// (row 1, col 0)
// @ts-expect-error method missing
buf.searchForward();
expect(buf.getCursor()).toEqual([1, 0]);
});
});
/* -------------------------------------------------------------------------- */
/* Wordwise navigation accuracy */
/* -------------------------------------------------------------------------- */
describe("wordLeft / wordRight punctuation boundaries", () => {
it.fails("wordLeft stops after punctuation like hyphen (-)", () => {
const buf = new TextBuffer("hello-world");
// Place caret at end of line
buf.move("end");
// Perform a single wordLeft in Rust implementation this lands right
// *after* the hyphen, i.e. between '-' and 'w' (column index 6).
buf.move("wordLeft");
expect(buf.getCursor()).toEqual([0, 6]);
});
it.fails(
"wordRight stops after punctuation like underscore (_) which is not in JS boundary set",
() => {
const buf = new TextBuffer("foo_bar");
// From start, one wordRight should land right after the underscore (col 4)
buf.move("wordRight");
expect(buf.getCursor()).toEqual([0, 4]);
},
);
});
/* -------------------------------------------------------------------------- */
/* Wordwise deletion (Ctrl+Backspace) */
/* -------------------------------------------------------------------------- */
describe("word deletion shortcuts", () => {
it.fails("Ctrl+Backspace deletes the previous word", () => {
const vp = { width: 80, height: 25 };
const buf = new TextBuffer("hello world");
// Place caret after the last character
buf.move("end");
// Simulate Ctrl+Backspace (terminal usually sends backspace with ctrl flag)
buf.handleInput(undefined, { backspace: true, ctrl: true }, vp);
// The whole word "world" (and the preceding space) should be removed,
// leaving just "hello".
expect(buf.getText()).toBe("hello");
expect(buf.getCursor()).toEqual([0, 5]);
});
});
/* -------------------------------------------------------------------------- */
/* Paragraph navigation */
/* -------------------------------------------------------------------------- */
describe("paragraph navigation", () => {
it.fails("Jumping forward by paragraph stops after a blank line", () => {
const text = [
"first paragraph line 1",
"first paragraph line 2",
"", // blank line separates paragraphs
"second paragraph line 1",
].join("\n");
const buf = new TextBuffer(text);
// Start at very beginning
// (No method exposed yet once implemented we will call move("paragraphForward"))
// For now we imitate the call; test will fail until the command exists.
// @ts-expect-error method not implemented yet
buf.move("paragraphForward");
// Expect caret to land at start of the first line _after_ the blank one
expect(buf.getCursor()).toEqual([3, 0]);
});
});
/* -------------------------------------------------------------------------- */
/* Independent scrolling */
/* -------------------------------------------------------------------------- */
describe("viewport scrolling independent of cursor", () => {
it.fails("scrolls without moving the caret", () => {
const lines = Array.from({ length: 100 }, (_, i) => `line ${i}`);
const buf = new TextBuffer(lines.join("\n"));
const vp = { width: 10, height: 5 };
// Cursor stays at 0,0. We now ask the view to scroll down by one page.
// @ts-expect-error method not implemented yet
buf.scroll("pageDown", vp);
// Cursor must remain at (0,0) even though viewport origin changed.
expect(buf.getCursor()).toEqual([0, 0]);
// The first visible line should now be "line 5".
expect(buf.getVisibleLines(vp)[0]).toBe("line 5");
});
});

View File

@@ -0,0 +1,115 @@
import TextBuffer from "../src/lib/text-buffer.js";
import { describe, test, expect } from "vitest";
describe("TextBuffer wordwise navigation & deletion", () => {
test("wordRight moves to endofline when no further boundary", () => {
const tb = new TextBuffer("hello");
// Move the caret inside the word (index 3)
tb.move("right");
tb.move("right");
tb.move("right");
tb.move("wordRight");
const [, col] = tb.getCursor();
expect(col).toBe(5); // end of the word / line
});
test("Ctrl+Backspace on raw byte deletes previous word", () => {
const tb = new TextBuffer("hello world");
const vp = { height: 10, width: 80 } as const;
// Place caret at end
tb.move("end");
// Simulate terminal sending DEL (0x7f) byte with ctrl modifier Ink
// usually does *not* set `key.backspace` in this path.
tb.handleInput("\x7f", { ctrl: true }, vp);
expect(tb.getText()).toBe("hello ");
});
test("Option/Alt+Backspace deletes previous word", () => {
const tb = new TextBuffer("foo bar baz");
const vp = { height: 10, width: 80 } as const;
// caret at end
tb.move("end");
// Simulate Option+Backspace (alt): Ink sets key.backspace = true, key.alt = true (no raw byte)
tb.handleInput(undefined, { backspace: true, alt: true }, vp);
expect(tb.getText()).toBe("foo bar ");
});
test("Option/Alt+Delete deletes next word", () => {
const tb = new TextBuffer("foo bar baz");
const vp = { height: 10, width: 80 } as const;
// Move caret between first and second word (after space)
tb.move("wordRight"); // after foo
tb.move("right"); // skip space -> start of bar
// Option+Delete
tb.handleInput(undefined, { delete: true, alt: true }, vp);
expect(tb.getText()).toBe("foo baz"); // note double space removed later maybe
});
test("wordLeft eventually reaches column 0", () => {
const tb = new TextBuffer("hello world");
// Move to end of line first
tb.move("end");
// two wordLefts should land at start of line
tb.move("wordLeft");
tb.move("wordLeft");
const [, col] = tb.getCursor();
expect(col).toBe(0);
});
test("wordRight jumps over a delimiter into the next word", () => {
const tb = new TextBuffer("hello world");
tb.move("wordRight"); // from start should land after "hello" (between space & w)
let [, col] = tb.getCursor();
expect(col).toBe(5);
// Next wordRight should move to end of line (after "world")
tb.move("wordRight");
[, col] = tb.getCursor();
expect(col).toBe(11);
});
test("deleteWordLeft removes the previous word and positions the caret correctly", () => {
const tb = new TextBuffer("hello world");
// Place caret at end of line
tb.move("end");
// Act
tb.deleteWordLeft();
expect(tb.getText()).toBe("hello ");
const [, col] = tb.getCursor();
expect(col).toBe(6); // after the space
});
test("deleteWordRight removes the following word", () => {
const tb = new TextBuffer("hello world");
// Move caret to start of "world"
tb.move("wordRight"); // caret after "hello"
tb.move("right"); // skip the space, now at index 6 (start of world)
// Act
tb.deleteWordRight();
expect(tb.getText()).toBe("hello ");
const [, col] = tb.getCursor();
expect(col).toBe(6);
});
});

View File

@@ -0,0 +1,264 @@
import TextBuffer from "../src/lib/text-buffer";
import { describe, it, expect } from "vitest";
describe("TextBuffer basic editing parity with Rust suite", () => {
/* ------------------------------------------------------------------ */
/* insert_char */
/* ------------------------------------------------------------------ */
it("insert_char / printable (single line)", () => {
// (col, char, expectedLine)
const cases: Array<[number, string, string]> = [
[0, "x", "xab"],
[1, "x", "axb"],
[2, "x", "abx"],
[1, "あ", "aあb"],
];
for (const [col, ch, want] of cases) {
const buf = new TextBuffer("ab");
buf.move("end"); // go to col 2
while (buf.getCursor()[1] > col) {
buf.move("left");
}
buf.insert(ch);
expect(buf.getText()).toBe(want);
expect(buf.getCursor()).toEqual([0, col + 1]);
}
});
/* ------------------------------------------------------------------ */
/* insert_char newline support */
/* ------------------------------------------------------------------ */
it("insert_char with a newline should split the line", () => {
const buf = new TextBuffer("ab");
// jump to end of first (and only) line
buf.move("end");
// Insert a raw \n character the Rust implementation splits the line
buf.insert("\n");
// We expect the text to be split into two separate lines
expect(buf.getLines()).toEqual(["ab", ""]);
expect(buf.getCursor()).toEqual([1, 0]);
});
/* ------------------------------------------------------------------ */
/* insert_str helpers */
/* ------------------------------------------------------------------ */
it("insert_str should insert multiline strings", () => {
const initial = ["ab", "cd", "ef"].join("\n");
const buf = new TextBuffer(initial);
// place cursor at (row:0, col:0)
// No move needed cursor starts at 0,0
buf.insertStr("x\ny");
const wantLines = ["x", "yab", "cd", "ef"];
expect(buf.getLines()).toEqual(wantLines);
expect(buf.getCursor()).toEqual([1, 1]);
});
/* ------------------------------------------------------------------ */
/* Undo / Redo */
/* ------------------------------------------------------------------ */
it("undo / redo history should revert edits", () => {
const buf = new TextBuffer("hello");
buf.move("end");
buf.insert("!"); // text becomes "hello!"
expect(buf.undo()).toBe(true);
expect(buf.getText()).toBe("hello");
expect(buf.redo()).toBe(true);
expect(buf.getText()).toBe("hello!");
});
/* ------------------------------------------------------------------ */
/* Selection model */
/* ------------------------------------------------------------------ */
it("copy & paste should operate on current selection", () => {
const buf = new TextBuffer("hello world");
buf.startSelection();
// Select the word "hello"
buf.move("right"); // h
buf.move("right"); // e
buf.move("right"); // l
buf.move("right"); // l
buf.move("right"); // o
buf.endSelection();
buf.copy();
// Move to end and paste
buf.move("end");
// add one space before pasting copied word
buf.insert(" ");
buf.paste();
expect(buf.getText()).toBe("hello world hello");
});
/* ------------------------------------------------------------------ */
/* Backspace behaviour */
/* ------------------------------------------------------------------ */
describe("backspace", () => {
it("deletes the character to the *left* of the caret within a line", () => {
const buf = new TextBuffer("abc");
// Move caret after the second character ( index 2 => after 'b' )
buf.move("right"); // -> a|bc (col 1)
buf.move("right"); // -> ab|c (col 2)
buf.backspace();
expect(buf.getLines()).toEqual(["ac"]);
expect(buf.getCursor()).toEqual([0, 1]);
});
it("merges with the previous line when invoked at column 0", () => {
const buf = new TextBuffer(["ab", "cd"].join("\n"));
// Place caret at the beginning of second line
buf.move("down"); // row = 1, col = 0
buf.backspace();
expect(buf.getLines()).toEqual(["abcd"]);
expect(buf.getCursor()).toEqual([0, 2]); // after 'b'
});
it("is a noop at the very beginning of the buffer", () => {
const buf = new TextBuffer("ab");
buf.backspace(); // caret starts at (0,0)
expect(buf.getLines()).toEqual(["ab"]);
expect(buf.getCursor()).toEqual([0, 0]);
});
});
/* ------------------------------------------------------------------ */
/* Vertical cursor movement we should preserve the preferred column */
/* ------------------------------------------------------------------ */
describe("up / down navigation keeps the preferred column", () => {
it("restores horizontal position when moving across shorter lines", () => {
// Three lines: long / short / long
const lines = ["abcdef", "x", "abcdefg"].join("\n");
const buf = new TextBuffer(lines);
// Place caret after the 5th char in first line (col = 5)
buf.move("end"); // col 6 (after 'f')
buf.move("left"); // col 5 (between 'e' and 'f')
// Move down twice through a short line and back to a long one
buf.move("down"); // should land on (1, 1) due to clamp
buf.move("down"); // desired: (2, 5)
expect(buf.getCursor()).toEqual([2, 5]);
});
});
/* ------------------------------------------------------------------ */
/* Left / Right arrow navigation across Unicode surrogate pairs */
/* ------------------------------------------------------------------ */
describe("left / right navigation", () => {
it("should treat multicodeunit emoji as a single character", () => {
// '🐶' is a surrogatepair (length 2) but one userperceived char.
const buf = new TextBuffer("🐶a");
// Move caret once to the right logically past the emoji.
buf.move("right");
// Insert another printable character
buf.insert("x");
// We expect the emoji to stay intact and the text to be 🐶xa
expect(buf.getLines()).toEqual(["🐶xa"]);
// Cursor should be after the inserted char (two visible columns along)
expect(buf.getCursor()).toEqual([0, 2]);
});
});
/* ------------------------------------------------------------------ */
/* HandleInput raw DEL bytes should map to backspace */
/* ------------------------------------------------------------------ */
it("handleInput should treat \x7f input as backspace", () => {
const buf = new TextBuffer("");
const vp = { width: 80, height: 25 };
// Type "hello" via printable input path
for (const ch of "hello") {
buf.handleInput(ch, {}, vp);
}
// Two DEL bytes terminal's backspace
buf.handleInput("\x7f", {}, vp);
buf.handleInput("\x7f", {}, vp);
expect(buf.getText()).toBe("hel");
expect(buf.getCursor()).toEqual([0, 3]);
});
/* ------------------------------------------------------------------ */
/* HandleInput `key.delete` should ALSO behave as backspace */
/* ------------------------------------------------------------------ */
it("handleInput should treat key.delete as backspace", () => {
const buf = new TextBuffer("");
const vp = { width: 80, height: 25 };
for (const ch of "hello") {
buf.handleInput(ch, {}, vp);
}
// Simulate the Delete (Mac backspace) key three times
buf.handleInput(undefined, { delete: true }, vp);
buf.handleInput(undefined, { delete: true }, vp);
buf.handleInput(undefined, { delete: true }, vp);
expect(buf.getText()).toBe("he");
expect(buf.getCursor()).toEqual([0, 2]);
});
/* ------------------------------------------------------------------ */
/* Cursor positioning semantics */
/* ------------------------------------------------------------------ */
describe("cursor movement & backspace semantics", () => {
it("typing should leave cursor after the last inserted character", () => {
const vp = { width: 80, height: 25 };
const buf = new TextBuffer("");
buf.handleInput("h", {}, vp);
expect(buf.getCursor()).toEqual([0, 1]);
for (const ch of "ello") {
buf.handleInput(ch, {}, vp);
}
expect(buf.getCursor()).toEqual([0, 5]); // after 'o'
});
it("arrowleft moves the caret to *between* characters (highlight next)", () => {
const vp = { width: 80, height: 25 };
const buf = new TextBuffer("");
for (const ch of "bar") {
buf.handleInput(ch, {}, vp);
} // cursor at col 3
buf.move("left"); // col 2 (right before 'r')
buf.move("left"); // col 1 (right before 'a')
expect(buf.getCursor()).toEqual([0, 1]);
// Character to the RIGHT of caret should be 'a'
const charRight = [...buf.getLines()[0]!][buf.getCursor()[1]];
expect(charRight).toBe("a");
// Backspace should delete the char to the *left* (i.e. 'b'), leaving "ar"
buf.backspace();
expect(buf.getLines()[0]).toBe("ar");
expect(buf.getCursor()).toEqual([0, 0]);
});
});
});

View File

@@ -0,0 +1,69 @@
/*
* Regression test ensure that the TypeaheadOverlay passes the *complete*
* list of items down to <SelectInput>. This guarantees that users can scroll
* through the full set instead of being limited to the hardcoded "limit"
* slice that is only meant to control how many rows are visible at once.
*/
import * as React from "react";
import { describe, it, expect, vi } from "vitest";
// ---------------------------------------------------------------------------
// Mock <ink-select-input> so we can capture the props that TypeaheadOverlay
// forwards without rendering the real component (which would require a full
// Ink TTY environment).
// ---------------------------------------------------------------------------
let receivedItems: Array<{ label: string; value: string }> | null = null;
vi.mock("ink-select-input", () => {
return {
default: (props: any) => {
receivedItems = props.items;
return null; // Do not render anything we only care about the props
},
};
});
// Ink's <TextInput> toggles rawmode which calls .ref() / .unref() on stdin.
// The test environment's mock streams don't implement those methods, so we
// polyfill them to noops on the prototype *before* the component tree mounts.
import { EventEmitter } from "node:events";
if (!(EventEmitter.prototype as any).ref) {
(EventEmitter.prototype as any).ref = () => {};
(EventEmitter.prototype as any).unref = () => {};
}
import type { TypeaheadItem } from "../src/components/typeahead-overlay.js";
import TypeaheadOverlay from "../src/components/typeahead-overlay.js";
import { renderTui } from "./ui-test-helpers.js";
describe("TypeaheadOverlay scrolling capability", () => {
it("passes the full item list to <SelectInput> so users can scroll beyond the visible limit", async () => {
const ITEMS: Array<TypeaheadItem> = Array.from({ length: 20 }, (_, i) => ({
label: `model-${i + 1}`,
value: `model-${i + 1}`,
}));
// Sanity reset capture before rendering
receivedItems = null;
const { flush, cleanup } = renderTui(
React.createElement(TypeaheadOverlay, {
title: "Test",
initialItems: ITEMS,
limit: 5, // visible rows should *not* limit the underlying list
onSelect: () => {},
onExit: () => {},
}),
);
await flush(); // allow first render to complete
expect(receivedItems).not.toBeNull();
expect((receivedItems ?? []).length).toBe(ITEMS.length);
cleanup();
});
});

View File

@@ -0,0 +1,28 @@
import type React from "react";
import { render } from "ink-testing-library";
import stripAnsi from "strip-ansi";
/**
* Render an Ink component for testing.
*
* Returns the full testinglibrary utils plus `lastFrameStripped()` which
* yields the latest rendered frame with ANSI escape codes removed so that
* assertions can be colouragnostic.
*/
export function renderTui(ui: React.ReactElement): any {
const utils = render(ui);
const lastFrameStripped = () => stripAnsi(utils.lastFrame() || "");
// A tiny helper that waits for Ink's internal promises / timers to settle
// so the next `lastFrame()` call reflects the latest UI state.
const flush = async () =>
new Promise<void>((resolve) => setTimeout(resolve, 0));
return {
...utils,
lastFrameStripped,
flush,
};
}