diff --git a/codex-cli/src/utils/agent/agent-loop.ts b/codex-cli/src/utils/agent/agent-loop.ts index b56c3efc..5fca0016 100644 --- a/codex-cli/src/utils/agent/agent-loop.ts +++ b/codex-cli/src/utils/agent/agent-loop.ts @@ -46,6 +46,7 @@ export type CommandConfirmation = { }; const alreadyProcessedResponses = new Set(); +const alreadyStagedItemIds = new Set(); type AgentLoopParams = { model: string; @@ -562,6 +563,12 @@ export class AgentLoop { return; } + // Skip items we've already processed to avoid staging duplicates + if (item.id && alreadyStagedItemIds.has(item.id)) { + return; + } + alreadyStagedItemIds.add(item.id); + // Store the item so the final flush can still operate on a complete list. // We'll nil out entries once they're delivered. const idx = staged.push(item) - 1; diff --git a/codex-cli/tests/agent-dedupe-items.test.ts b/codex-cli/tests/agent-dedupe-items.test.ts new file mode 100644 index 00000000..148aa744 --- /dev/null +++ b/codex-cli/tests/agent-dedupe-items.test.ts @@ -0,0 +1,115 @@ +import { describe, it, expect, vi } from "vitest"; + +// --------------------------------------------------------------------------- +// This regression test ensures that AgentLoop only surfaces each response item +// once even when the same item appears multiple times in the OpenAI streaming +// response (e.g. as an early `response.output_item.done` event *and* again in +// the final `response.completed` payload). +// --------------------------------------------------------------------------- + +// Fake OpenAI stream that emits the *same* message twice: first as an +// incremental output event and then again in the turn completion payload. +class FakeStream { + public controller = { abort: vi.fn() }; + + async *[Symbol.asyncIterator]() { + // 1) Early incremental item. + yield { + type: "response.output_item.done", + item: { + type: "message", + id: "call-dedupe-1", + role: "assistant", + content: [{ type: "input_text", text: "Hello!" }], + }, + } as any; + + // 2) Turn completion containing the *same* item again. + yield { + type: "response.completed", + response: { + id: "resp-dedupe-1", + status: "completed", + output: [ + { + type: "message", + id: "call-dedupe-1", + role: "assistant", + content: [{ type: "input_text", text: "Hello!" }], + }, + ], + }, + } as any; + } +} + +// Intercept the OpenAI SDK used inside AgentLoop so we can inject our fake +// streaming implementation. +vi.mock("openai", () => { + class FakeOpenAI { + public responses = { + create: async () => new FakeStream(), + }; + } + + class APIConnectionTimeoutError extends Error {} + + return { __esModule: true, default: FakeOpenAI, APIConnectionTimeoutError }; +}); + +// Stub approvals / formatting helpers – not relevant here. +vi.mock("../src/approvals.js", () => ({ + __esModule: true, + alwaysApprovedCommands: new Set(), + canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false }) as any, + isSafeCommand: () => null, +})); + +vi.mock("../src/format-command.js", () => ({ + __esModule: true, + formatCommandForDisplay: (cmd: Array) => cmd.join(" "), +})); + +vi.mock("../src/utils/agent/log.js", () => ({ + __esModule: true, + log: () => {}, + isLoggingEnabled: () => false, +})); + +// After the dependency mocks we can import the module under test. +import { AgentLoop } from "../src/utils/agent/agent-loop.js"; + +describe("AgentLoop deduplicates output items", () => { + it("invokes onItem exactly once for duplicate items with the same id", async () => { + const received: Array = []; + + const agent = new AgentLoop({ + model: "any", + instructions: "", + config: { model: "any", instructions: "", notify: false }, + approvalPolicy: { mode: "auto" } as any, + additionalWritableRoots: [], + onItem: (item) => received.push(item), + onLoading: () => {}, + getCommandConfirmation: async () => ({ review: "yes" }) as any, + onLastResponseId: () => {}, + }); + + const userMsg = [ + { + type: "message", + role: "user", + content: [{ type: "input_text", text: "hi" }], + }, + ]; + + await agent.run(userMsg as any); + + // Give the setTimeout(3ms) inside AgentLoop.stageItem a chance to fire. + await new Promise((r) => setTimeout(r, 20)); + + // Count how many times the duplicate item surfaced. + const appearances = received.filter((i) => i.id === "call-dedupe-1").length; + expect(appearances).toBe(1); + }); +});