From 9b0ccf9aebff1396a05eded989921bf79af47292 Mon Sep 17 00:00:00 2001 From: moppywhip <48742547+moppywhip@users.noreply.github.com> Date: Sat, 26 Apr 2025 12:14:50 -0400 Subject: [PATCH] fix: duplicate messages in quiet mode (#680) Addressing #600 and #664 (partially) ## Bug Codex was staging duplicate items in output running when the same response item appeared in both the streaming events. Specifically: 1. Items would be staged once when received as a `response.output_item.done` event 2. The same items would be staged again when included in the final `response.completed` payload This duplication would result in each message being sent several times in the quiet mode output. ## Changes - Added a Set (`alreadyStagedItemIds`) to track items that have already been staged - Modified the `stageItem` function to check if an item's ID is already in this set before staging it - Added a regression test (`agent-dedupe-items.test.ts`) that verifies items with the same ID are only staged once ## Testing Like other tests, the included test creates a mock OpenAI stream that emits the same message twice (once as an incremental event and once in the final response) and verifies the item is only passed to `onItem` once. --- codex-cli/src/utils/agent/agent-loop.ts | 7 ++ codex-cli/tests/agent-dedupe-items.test.ts | 115 +++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 codex-cli/tests/agent-dedupe-items.test.ts diff --git a/codex-cli/src/utils/agent/agent-loop.ts b/codex-cli/src/utils/agent/agent-loop.ts index b56c3efc..5fca0016 100644 --- a/codex-cli/src/utils/agent/agent-loop.ts +++ b/codex-cli/src/utils/agent/agent-loop.ts @@ -46,6 +46,7 @@ export type CommandConfirmation = { }; const alreadyProcessedResponses = new Set(); +const alreadyStagedItemIds = new Set(); type AgentLoopParams = { model: string; @@ -562,6 +563,12 @@ export class AgentLoop { return; } + // Skip items we've already processed to avoid staging duplicates + if (item.id && alreadyStagedItemIds.has(item.id)) { + return; + } + alreadyStagedItemIds.add(item.id); + // Store the item so the final flush can still operate on a complete list. // We'll nil out entries once they're delivered. const idx = staged.push(item) - 1; diff --git a/codex-cli/tests/agent-dedupe-items.test.ts b/codex-cli/tests/agent-dedupe-items.test.ts new file mode 100644 index 00000000..148aa744 --- /dev/null +++ b/codex-cli/tests/agent-dedupe-items.test.ts @@ -0,0 +1,115 @@ +import { describe, it, expect, vi } from "vitest"; + +// --------------------------------------------------------------------------- +// This regression test ensures that AgentLoop only surfaces each response item +// once even when the same item appears multiple times in the OpenAI streaming +// response (e.g. as an early `response.output_item.done` event *and* again in +// the final `response.completed` payload). +// --------------------------------------------------------------------------- + +// Fake OpenAI stream that emits the *same* message twice: first as an +// incremental output event and then again in the turn completion payload. +class FakeStream { + public controller = { abort: vi.fn() }; + + async *[Symbol.asyncIterator]() { + // 1) Early incremental item. + yield { + type: "response.output_item.done", + item: { + type: "message", + id: "call-dedupe-1", + role: "assistant", + content: [{ type: "input_text", text: "Hello!" }], + }, + } as any; + + // 2) Turn completion containing the *same* item again. + yield { + type: "response.completed", + response: { + id: "resp-dedupe-1", + status: "completed", + output: [ + { + type: "message", + id: "call-dedupe-1", + role: "assistant", + content: [{ type: "input_text", text: "Hello!" }], + }, + ], + }, + } as any; + } +} + +// Intercept the OpenAI SDK used inside AgentLoop so we can inject our fake +// streaming implementation. +vi.mock("openai", () => { + class FakeOpenAI { + public responses = { + create: async () => new FakeStream(), + }; + } + + class APIConnectionTimeoutError extends Error {} + + return { __esModule: true, default: FakeOpenAI, APIConnectionTimeoutError }; +}); + +// Stub approvals / formatting helpers – not relevant here. +vi.mock("../src/approvals.js", () => ({ + __esModule: true, + alwaysApprovedCommands: new Set(), + canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false }) as any, + isSafeCommand: () => null, +})); + +vi.mock("../src/format-command.js", () => ({ + __esModule: true, + formatCommandForDisplay: (cmd: Array) => cmd.join(" "), +})); + +vi.mock("../src/utils/agent/log.js", () => ({ + __esModule: true, + log: () => {}, + isLoggingEnabled: () => false, +})); + +// After the dependency mocks we can import the module under test. +import { AgentLoop } from "../src/utils/agent/agent-loop.js"; + +describe("AgentLoop deduplicates output items", () => { + it("invokes onItem exactly once for duplicate items with the same id", async () => { + const received: Array = []; + + const agent = new AgentLoop({ + model: "any", + instructions: "", + config: { model: "any", instructions: "", notify: false }, + approvalPolicy: { mode: "auto" } as any, + additionalWritableRoots: [], + onItem: (item) => received.push(item), + onLoading: () => {}, + getCommandConfirmation: async () => ({ review: "yes" }) as any, + onLastResponseId: () => {}, + }); + + const userMsg = [ + { + type: "message", + role: "user", + content: [{ type: "input_text", text: "hi" }], + }, + ]; + + await agent.run(userMsg as any); + + // Give the setTimeout(3ms) inside AgentLoop.stageItem a chance to fire. + await new Promise((r) => setTimeout(r, 20)); + + // Count how many times the duplicate item surfaced. + const appearances = received.filter((i) => i.id === "call-dedupe-1").length; + expect(appearances).toBe(1); + }); +});