2025-04-16 12:56:08 -04:00
|
|
|
|
import { describe, it, expect, vi } from "vitest";
|
|
|
|
|
|
|
|
|
|
|
|
// --- OpenAI stream mock ----------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
class FakeStream {
|
|
|
|
|
|
public controller = { abort: vi.fn() };
|
|
|
|
|
|
|
|
|
|
|
|
async *[Symbol.asyncIterator]() {
|
|
|
|
|
|
// Immediately ask for a shell function call so we can test that the
|
|
|
|
|
|
// subsequent function_call_output never gets surfaced after terminate().
|
|
|
|
|
|
yield {
|
|
|
|
|
|
type: "response.output_item.done",
|
|
|
|
|
|
item: {
|
|
|
|
|
|
type: "function_call",
|
|
|
|
|
|
id: "call‑terminate‑1",
|
|
|
|
|
|
name: "shell",
|
|
|
|
|
|
arguments: JSON.stringify({ cmd: ["sleep", "5"] }),
|
|
|
|
|
|
},
|
|
|
|
|
|
} as any;
|
|
|
|
|
|
|
|
|
|
|
|
// Turn completion echoing the same function call.
|
|
|
|
|
|
yield {
|
|
|
|
|
|
type: "response.completed",
|
|
|
|
|
|
response: {
|
|
|
|
|
|
id: "resp‑terminate‑1",
|
|
|
|
|
|
status: "completed",
|
|
|
|
|
|
output: [
|
|
|
|
|
|
{
|
|
|
|
|
|
type: "function_call",
|
|
|
|
|
|
id: "call‑terminate‑1",
|
|
|
|
|
|
name: "shell",
|
|
|
|
|
|
arguments: JSON.stringify({ cmd: ["sleep", "5"] }),
|
|
|
|
|
|
},
|
|
|
|
|
|
],
|
|
|
|
|
|
},
|
|
|
|
|
|
} as any;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
vi.mock("openai", () => {
|
|
|
|
|
|
class FakeOpenAI {
|
|
|
|
|
|
public responses = {
|
|
|
|
|
|
create: async () => new FakeStream(),
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
class APIConnectionTimeoutError extends Error {}
|
|
|
|
|
|
return { __esModule: true, default: FakeOpenAI, APIConnectionTimeoutError };
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
// --- Helpers referenced by handle‑exec‑command -----------------------------
|
|
|
|
|
|
|
2025-04-16 14:16:53 -07:00
|
|
|
|
vi.mock("../src/approvals.js", () => {
|
2025-04-16 12:56:08 -04:00
|
|
|
|
return {
|
|
|
|
|
|
__esModule: true,
|
|
|
|
|
|
alwaysApprovedCommands: new Set<string>(),
|
|
|
|
|
|
canAutoApprove: () =>
|
|
|
|
|
|
({ type: "auto-approve", runInSandbox: false } as any),
|
|
|
|
|
|
isSafeCommand: () => null,
|
|
|
|
|
|
};
|
|
|
|
|
|
});
|
|
|
|
|
|
|
2025-04-16 14:16:53 -07:00
|
|
|
|
vi.mock("../src/format-command.js", () => {
|
2025-04-16 12:56:08 -04:00
|
|
|
|
return {
|
|
|
|
|
|
__esModule: true,
|
|
|
|
|
|
formatCommandForDisplay: (cmd: Array<string>) => cmd.join(" "),
|
|
|
|
|
|
};
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
// Stub logger to avoid filesystem side‑effects
|
|
|
|
|
|
vi.mock("../src/utils/agent/log.js", () => ({
|
|
|
|
|
|
__esModule: true,
|
|
|
|
|
|
log: () => {},
|
|
|
|
|
|
isLoggingEnabled: () => false,
|
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
|
|
// After dependency mocks we can import the modules under test.
|
|
|
|
|
|
|
|
|
|
|
|
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
|
|
|
|
|
|
import * as handleExec from "../src/utils/agent/handle-exec-command.js";
|
|
|
|
|
|
|
|
|
|
|
|
describe("Agent terminate (hard cancel)", () => {
|
|
|
|
|
|
it("suppresses function_call_output and stops processing once terminate() is invoked", async () => {
|
|
|
|
|
|
// Simulate a long‑running exec that would normally resolve with output.
|
|
|
|
|
|
vi.spyOn(handleExec, "handleExecCommand").mockImplementation(
|
2025-04-17 15:39:26 -07:00
|
|
|
|
async (
|
|
|
|
|
|
_args,
|
|
|
|
|
|
_config,
|
|
|
|
|
|
_policy,
|
|
|
|
|
|
_additionalWritableRoots,
|
|
|
|
|
|
_getConf,
|
|
|
|
|
|
abortSignal,
|
|
|
|
|
|
) => {
|
2025-04-16 12:56:08 -04:00
|
|
|
|
// Wait until the abort signal is fired or 2s (whichever comes first).
|
|
|
|
|
|
await new Promise<void>((resolve) => {
|
|
|
|
|
|
if (abortSignal?.aborted) {
|
|
|
|
|
|
return resolve();
|
|
|
|
|
|
}
|
|
|
|
|
|
const timer = setTimeout(resolve, 2000);
|
|
|
|
|
|
abortSignal?.addEventListener("abort", () => {
|
|
|
|
|
|
clearTimeout(timer);
|
|
|
|
|
|
resolve();
|
|
|
|
|
|
});
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
return { outputText: "should‑not‑happen", metadata: {} } as any;
|
|
|
|
|
|
},
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
const received: Array<any> = [];
|
|
|
|
|
|
|
|
|
|
|
|
const agent = new AgentLoop({
|
|
|
|
|
|
model: "any",
|
|
|
|
|
|
instructions: "",
|
2025-04-17 16:19:26 -07:00
|
|
|
|
config: { model: "any", instructions: "", notify: false },
|
2025-04-16 12:56:08 -04:00
|
|
|
|
approvalPolicy: { mode: "auto" } as any,
|
2025-04-17 15:39:26 -07:00
|
|
|
|
additionalWritableRoots: [],
|
2025-04-16 12:56:08 -04:00
|
|
|
|
onItem: (item) => received.push(item),
|
|
|
|
|
|
onLoading: () => {},
|
|
|
|
|
|
getCommandConfirmation: async () => ({ review: "yes" } as any),
|
|
|
|
|
|
onLastResponseId: () => {},
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
const userMsg = [
|
|
|
|
|
|
{
|
|
|
|
|
|
type: "message",
|
|
|
|
|
|
role: "user",
|
|
|
|
|
|
content: [{ type: "input_text", text: "run long cmd" }],
|
|
|
|
|
|
},
|
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
|
|
// Start agent loop but don't wait for completion.
|
|
|
|
|
|
agent.run(userMsg as any);
|
|
|
|
|
|
|
|
|
|
|
|
// Give it a brief moment to start and process the function_call.
|
|
|
|
|
|
await new Promise((r) => setTimeout(r, 10));
|
|
|
|
|
|
|
|
|
|
|
|
agent.terminate();
|
|
|
|
|
|
|
|
|
|
|
|
// Allow promises to settle.
|
|
|
|
|
|
await new Promise((r) => setTimeout(r, 50));
|
|
|
|
|
|
|
|
|
|
|
|
const hasOutput = received.some((i) => i.type === "function_call_output");
|
|
|
|
|
|
expect(hasOutput).toBe(false);
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
it("rejects further run() calls after terminate()", async () => {
|
|
|
|
|
|
const agent = new AgentLoop({
|
|
|
|
|
|
model: "any",
|
|
|
|
|
|
instructions: "",
|
2025-04-17 16:19:26 -07:00
|
|
|
|
config: { model: "any", instructions: "", notify: false },
|
2025-04-16 12:56:08 -04:00
|
|
|
|
approvalPolicy: { mode: "auto" } as any,
|
2025-04-17 15:39:26 -07:00
|
|
|
|
additionalWritableRoots: [],
|
2025-04-16 12:56:08 -04:00
|
|
|
|
onItem: () => {},
|
|
|
|
|
|
onLoading: () => {},
|
|
|
|
|
|
getCommandConfirmation: async () => ({ review: "yes" } as any),
|
|
|
|
|
|
onLastResponseId: () => {},
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
agent.terminate();
|
|
|
|
|
|
|
|
|
|
|
|
const dummyMsg = [
|
|
|
|
|
|
{
|
|
|
|
|
|
type: "message",
|
|
|
|
|
|
role: "user",
|
|
|
|
|
|
content: [{ type: "input_text", text: "noop" }],
|
|
|
|
|
|
},
|
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
|
|
let threw = false;
|
|
|
|
|
|
try {
|
|
|
|
|
|
// We expect this to fail fast – either by throwing synchronously or by
|
|
|
|
|
|
// returning a rejected promise.
|
|
|
|
|
|
await agent.run(dummyMsg as any);
|
|
|
|
|
|
} catch {
|
|
|
|
|
|
threw = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
expect(threw).toBe(true);
|
|
|
|
|
|
});
|
|
|
|
|
|
});
|