From 9b0ccf9aebff1396a05eded989921bf79af47292 Mon Sep 17 00:00:00 2001
From: moppywhip <48742547+moppywhip@users.noreply.github.com>
Date: Sat, 26 Apr 2025 12:14:50 -0400
Subject: [PATCH] fix: duplicate messages in quiet mode (#680)

Addressing #600 and #664 (partially)

## Bug
Codex was staging duplicate items in output running when the same
response item appeared in both the streaming events. Specifically:

1. Items would be staged once when received as a
`response.output_item.done` event
2. The same items would be staged again when included in the final
`response.completed` payload

This duplication would result in each message being sent several times
in the quiet mode output.

## Changes
- Added a Set (`alreadyStagedItemIds`) to track items that have already
been staged
- Modified the `stageItem` function to check if an item's ID is already
in this set before staging it
- Added a regression test (`agent-dedupe-items.test.ts`) that verifies
items with the same ID are only staged once

## Testing
Like other tests, the included test creates a mock OpenAI stream that
emits the same message twice (once as an incremental event and once in
the final response) and verifies the item is only passed to `onItem`
once.
---
 codex-cli/src/utils/agent/agent-loop.ts    |   7 ++
 codex-cli/tests/agent-dedupe-items.test.ts | 115 +++++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 codex-cli/tests/agent-dedupe-items.test.ts
diff --git a/codex-cli/src/utils/agent/agent-loop.ts b/codex-cli/src/utils/agent/agent-loop.ts
index b56c3efc..5fca0016 100644
--- a/codex-cli/src/utils/agent/agent-loop.ts
+++ b/codex-cli/src/utils/agent/agent-loop.ts
@@ -46,6 +46,7 @@ export type CommandConfirmation = {
 };
 
 const alreadyProcessedResponses = new Set();
+const alreadyStagedItemIds = new Set<string>();
 
 type AgentLoopParams = {
   model: string;
@@ -562,6 +563,12 @@ export class AgentLoop {
           return;
         }
 
+        // Skip items we've already processed to avoid staging duplicates
+        if (item.id && alreadyStagedItemIds.has(item.id)) {
+          return;
+        }
+        alreadyStagedItemIds.add(item.id);
+
         // Store the item so the final flush can still operate on a complete list.
         // We'll nil out entries once they're delivered.
         const idx = staged.push(item) - 1;
diff --git a/codex-cli/tests/agent-dedupe-items.test.ts b/codex-cli/tests/agent-dedupe-items.test.ts
new file mode 100644
index 00000000..148aa744
--- /dev/null
+++ b/codex-cli/tests/agent-dedupe-items.test.ts
@@ -0,0 +1,115 @@
+import { describe, it, expect, vi } from "vitest";
+
+// ---------------------------------------------------------------------------
+// This regression test ensures that AgentLoop only surfaces each response item
+// once even when the same item appears multiple times in the OpenAI streaming
+// response (e.g. as an early `response.output_item.done` event *and* again in
+// the final `response.completed` payload).
+// ---------------------------------------------------------------------------
+
+// Fake OpenAI stream that emits the *same* message twice: first as an
+// incremental output event and then again in the turn completion payload.
+class FakeStream {
+  public controller = { abort: vi.fn() };
+
+  async *[Symbol.asyncIterator]() {
+    // 1) Early incremental item.
+    yield {
+      type: "response.output_item.done",
+      item: {
+        type: "message",
+        id: "call-dedupe-1",
+        role: "assistant",
+        content: [{ type: "input_text", text: "Hello!" }],
+      },
+    } as any;
+
+    // 2) Turn completion containing the *same* item again.
+    yield {
+      type: "response.completed",
+      response: {
+        id: "resp-dedupe-1",
+        status: "completed",
+        output: [
+          {
+            type: "message",
+            id: "call-dedupe-1",
+            role: "assistant",
+            content: [{ type: "input_text", text: "Hello!" }],
+          },
+        ],
+      },
+    } as any;
+  }
+}
+
+// Intercept the OpenAI SDK used inside AgentLoop so we can inject our fake
+// streaming implementation.
+vi.mock("openai", () => {
+  class FakeOpenAI {
+    public responses = {
+      create: async () => new FakeStream(),
+    };
+  }
+
+  class APIConnectionTimeoutError extends Error {}
+
+  return { __esModule: true, default: FakeOpenAI, APIConnectionTimeoutError };
+});
+
+// Stub approvals / formatting helpers – not relevant here.
+vi.mock("../src/approvals.js", () => ({
+  __esModule: true,
+  alwaysApprovedCommands: new Set<string>(),
+  canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false }) as any,
+  isSafeCommand: () => null,
+}));
+
+vi.mock("../src/format-command.js", () => ({
+  __esModule: true,
+  formatCommandForDisplay: (cmd: Array<string>) => cmd.join(" "),
+}));
+
+vi.mock("../src/utils/agent/log.js", () => ({
+  __esModule: true,
+  log: () => {},
+  isLoggingEnabled: () => false,
+}));
+
+// After the dependency mocks we can import the module under test.
+import { AgentLoop } from "../src/utils/agent/agent-loop.js";
+
+describe("AgentLoop deduplicates output items", () => {
+  it("invokes onItem exactly once for duplicate items with the same id", async () => {
+    const received: Array<any> = [];
+
+    const agent = new AgentLoop({
+      model: "any",
+      instructions: "",
+      config: { model: "any", instructions: "", notify: false },
+      approvalPolicy: { mode: "auto" } as any,
+      additionalWritableRoots: [],
+      onItem: (item) => received.push(item),
+      onLoading: () => {},
+      getCommandConfirmation: async () => ({ review: "yes" }) as any,
+      onLastResponseId: () => {},
+    });
+
+    const userMsg = [
+      {
+        type: "message",
+        role: "user",
+        content: [{ type: "input_text", text: "hi" }],
+      },
+    ];
+
+    await agent.run(userMsg as any);
+
+    // Give the setTimeout(3ms) inside AgentLoop.stageItem a chance to fire.
+    await new Promise((r) => setTimeout(r, 20));
+
+    // Count how many times the duplicate item surfaced.
+    const appearances = received.filter((i) => i.id === "call-dedupe-1").length;
+    expect(appearances).toBe(1);
+  });
+});