fix: duplicate messages in quiet mode (#680)
Addressing #600 and #664 (partially) ## Bug Codex was staging duplicate items in output running when the same response item appeared in both the streaming events. Specifically: 1. Items would be staged once when received as a `response.output_item.done` event 2. The same items would be staged again when included in the final `response.completed` payload This duplication would result in each message being sent several times in the quiet mode output. ## Changes - Added a Set (`alreadyStagedItemIds`) to track items that have already been staged - Modified the `stageItem` function to check if an item's ID is already in this set before staging it - Added a regression test (`agent-dedupe-items.test.ts`) that verifies items with the same ID are only staged once ## Testing Like other tests, the included test creates a mock OpenAI stream that emits the same message twice (once as an incremental event and once in the final response) and verifies the item is only passed to `onItem` once.
This commit is contained in:
@@ -46,6 +46,7 @@ export type CommandConfirmation = {
|
||||
};
|
||||
|
||||
const alreadyProcessedResponses = new Set();
|
||||
const alreadyStagedItemIds = new Set<string>();
|
||||
|
||||
type AgentLoopParams = {
|
||||
model: string;
|
||||
@@ -562,6 +563,12 @@ export class AgentLoop {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip items we've already processed to avoid staging duplicates
|
||||
if (item.id && alreadyStagedItemIds.has(item.id)) {
|
||||
return;
|
||||
}
|
||||
alreadyStagedItemIds.add(item.id);
|
||||
|
||||
// Store the item so the final flush can still operate on a complete list.
|
||||
// We'll nil out entries once they're delivered.
|
||||
const idx = staged.push(item) - 1;
|
||||
|
||||
115
codex-cli/tests/agent-dedupe-items.test.ts
Normal file
115
codex-cli/tests/agent-dedupe-items.test.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import { describe, it, expect, vi } from "vitest";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// This regression test ensures that AgentLoop only surfaces each response item
|
||||
// once even when the same item appears multiple times in the OpenAI streaming
|
||||
// response (e.g. as an early `response.output_item.done` event *and* again in
|
||||
// the final `response.completed` payload).
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Fake OpenAI stream that emits the *same* message twice: first as an
|
||||
// incremental output event and then again in the turn completion payload.
|
||||
class FakeStream {
|
||||
public controller = { abort: vi.fn() };
|
||||
|
||||
async *[Symbol.asyncIterator]() {
|
||||
// 1) Early incremental item.
|
||||
yield {
|
||||
type: "response.output_item.done",
|
||||
item: {
|
||||
type: "message",
|
||||
id: "call-dedupe-1",
|
||||
role: "assistant",
|
||||
content: [{ type: "input_text", text: "Hello!" }],
|
||||
},
|
||||
} as any;
|
||||
|
||||
// 2) Turn completion containing the *same* item again.
|
||||
yield {
|
||||
type: "response.completed",
|
||||
response: {
|
||||
id: "resp-dedupe-1",
|
||||
status: "completed",
|
||||
output: [
|
||||
{
|
||||
type: "message",
|
||||
id: "call-dedupe-1",
|
||||
role: "assistant",
|
||||
content: [{ type: "input_text", text: "Hello!" }],
|
||||
},
|
||||
],
|
||||
},
|
||||
} as any;
|
||||
}
|
||||
}
|
||||
|
||||
// Intercept the OpenAI SDK used inside AgentLoop so we can inject our fake
|
||||
// streaming implementation.
|
||||
vi.mock("openai", () => {
|
||||
class FakeOpenAI {
|
||||
public responses = {
|
||||
create: async () => new FakeStream(),
|
||||
};
|
||||
}
|
||||
|
||||
class APIConnectionTimeoutError extends Error {}
|
||||
|
||||
return { __esModule: true, default: FakeOpenAI, APIConnectionTimeoutError };
|
||||
});
|
||||
|
||||
// Stub approvals / formatting helpers – not relevant here.
|
||||
vi.mock("../src/approvals.js", () => ({
|
||||
__esModule: true,
|
||||
alwaysApprovedCommands: new Set<string>(),
|
||||
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false }) as any,
|
||||
isSafeCommand: () => null,
|
||||
}));
|
||||
|
||||
vi.mock("../src/format-command.js", () => ({
|
||||
__esModule: true,
|
||||
formatCommandForDisplay: (cmd: Array<string>) => cmd.join(" "),
|
||||
}));
|
||||
|
||||
vi.mock("../src/utils/agent/log.js", () => ({
|
||||
__esModule: true,
|
||||
log: () => {},
|
||||
isLoggingEnabled: () => false,
|
||||
}));
|
||||
|
||||
// After the dependency mocks we can import the module under test.
|
||||
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
|
||||
|
||||
describe("AgentLoop deduplicates output items", () => {
|
||||
it("invokes onItem exactly once for duplicate items with the same id", async () => {
|
||||
const received: Array<any> = [];
|
||||
|
||||
const agent = new AgentLoop({
|
||||
model: "any",
|
||||
instructions: "",
|
||||
config: { model: "any", instructions: "", notify: false },
|
||||
approvalPolicy: { mode: "auto" } as any,
|
||||
additionalWritableRoots: [],
|
||||
onItem: (item) => received.push(item),
|
||||
onLoading: () => {},
|
||||
getCommandConfirmation: async () => ({ review: "yes" }) as any,
|
||||
onLastResponseId: () => {},
|
||||
});
|
||||
|
||||
const userMsg = [
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "hi" }],
|
||||
},
|
||||
];
|
||||
|
||||
await agent.run(userMsg as any);
|
||||
|
||||
// Give the setTimeout(3ms) inside AgentLoop.stageItem a chance to fire.
|
||||
await new Promise((r) => setTimeout(r, 20));
|
||||
|
||||
// Count how many times the duplicate item surfaced.
|
||||
const appearances = received.filter((i) => i.id === "call-dedupe-1").length;
|
||||
expect(appearances).toBe(1);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user