fix: agent loop for disable response storage (#543)

- Fixes post-merge of #506

---------

Co-authored-by: Ilan Bigio <ilan@openai.com>
This commit is contained in:
Fouad Matin
2025-04-22 13:49:10 -07:00
committed by GitHub
parent f99c9080fd
commit a30e79b768
2 changed files with 184 additions and 73 deletions

View File

@@ -7,6 +7,7 @@ import type {
ResponseInputItem, ResponseInputItem,
ResponseItem, ResponseItem,
ResponseCreateParams, ResponseCreateParams,
FunctionTool,
} from "openai/resources/responses/responses.mjs"; } from "openai/resources/responses/responses.mjs";
import type { Reasoning } from "openai/resources.mjs"; import type { Reasoning } from "openai/resources.mjs";
@@ -68,6 +69,30 @@ type AgentLoopParams = {
onLastResponseId: (lastResponseId: string) => void; onLastResponseId: (lastResponseId: string) => void;
}; };
const shellTool: FunctionTool = {
type: "function",
name: "shell",
description: "Runs a shell command, and returns its output.",
strict: false,
parameters: {
type: "object",
properties: {
command: { type: "array", items: { type: "string" } },
workdir: {
type: "string",
description: "The working directory for the command.",
},
timeout: {
type: "number",
description:
"The maximum time to wait for the command to complete in milliseconds.",
},
},
required: ["command"],
additionalProperties: false,
},
};
export class AgentLoop { export class AgentLoop {
private model: string; private model: string;
private provider: string; private provider: string;
@@ -109,7 +134,7 @@ export class AgentLoop {
private canceled = false; private canceled = false;
/** /**
* Local conversation transcript used when `disableResponseStorage === false`. Holds * Local conversation transcript used when `disableResponseStorage === true`. Holds
* all nonsystem items exchanged so far so we can provide full context on * all nonsystem items exchanged so far so we can provide full context on
* every request. * every request.
*/ */
@@ -442,9 +467,13 @@ export class AgentLoop {
// `previous_response_id` when `disableResponseStorage` is enabled. When storage // `previous_response_id` when `disableResponseStorage` is enabled. When storage
// is disabled we deliberately ignore the callersupplied value because // is disabled we deliberately ignore the callersupplied value because
// the backend will not retain any state that could be referenced. // the backend will not retain any state that could be referenced.
// If the backend stores conversation state (`disableResponseStorage === false`) we
// forward the callersupplied `previousResponseId` so that the model sees the
// full context. When storage is disabled we *must not* send any ID because the
// server no longer retains the referenced response.
let lastResponseId: string = this.disableResponseStorage let lastResponseId: string = this.disableResponseStorage
? previousResponseId ? ""
: ""; : previousResponseId;
// If there are unresolved function calls from a previously cancelled run // If there are unresolved function calls from a previously cancelled run
// we have to emit dummy tool outputs so that the API no longer expects // we have to emit dummy tool outputs so that the API no longer expects
@@ -473,7 +502,11 @@ export class AgentLoop {
// conversation, so we must include the *entire* transcript (minus system // conversation, so we must include the *entire* transcript (minus system
// messages) on every call. // messages) on every call.
let turnInput: Array<ResponseInputItem>; let turnInput: Array<ResponseInputItem> = [];
// Keeps track of how many items in `turnInput` stem from the existing
// transcript so we can avoid reemitting them to the UI. Only used when
// `disableResponseStorage === true`.
let transcriptPrefixLen = 0;
const stripInternalFields = ( const stripInternalFields = (
item: ResponseInputItem, item: ResponseInputItem,
@@ -485,28 +518,47 @@ export class AgentLoop {
// be referenced elsewhere (e.g. UI components). // be referenced elsewhere (e.g. UI components).
const clean = { ...item } as Record<string, unknown>; const clean = { ...item } as Record<string, unknown>;
delete clean["duration_ms"]; delete clean["duration_ms"];
// Remove OpenAI-assigned identifiers and transient status so the
// backend does not reject items that were never persisted because we
// use `store: false`.
delete clean["id"];
delete clean["status"];
return clean as unknown as ResponseInputItem; return clean as unknown as ResponseInputItem;
}; };
if (this.disableResponseStorage) { if (this.disableResponseStorage) {
// Remember where the existing transcript ends everything after this
// index in the upcoming `turnInput` list will be *new* for this turn
// and therefore needs to be surfaced to the UI.
transcriptPrefixLen = this.transcript.length;
// Ensure the transcript is uptodate with the latest user input so // Ensure the transcript is uptodate with the latest user input so
// that subsequent iterations see a complete history. // that subsequent iterations see a complete history.
const newUserItems: Array<ResponseInputItem> = input.filter((it) => { // `turnInput` is still empty at this point (it will be filled later).
if ( // We need to look at the *input* items the user just supplied.
(it.type === "message" && it.role !== "system") || this.transcript.push(...filterToApiMessages(input));
it.type === "reasoning"
) {
return false;
}
return true;
});
this.transcript.push(...newUserItems);
turnInput = [...this.transcript, ...abortOutputs].map( turnInput = [...this.transcript, ...abortOutputs].map(
stripInternalFields, stripInternalFields,
); );
} else { } else {
turnInput = [...abortOutputs, ...input].map(stripInternalFields); turnInput = [...abortOutputs, ...input].map(stripInternalFields);
// When response storage is disabled we have to maintain our own
// running transcript so that the next request still contains the
// full conversational history. We skipped the transcript update in
// the branch above ensure we do it here as well.
if (this.disableResponseStorage) {
const newUserItems: Array<ResponseInputItem> = input.filter((it) => {
if (it.type === "message" && it.role === "system") {
return false;
} else if (it.type === "reasoning") {
return false;
}
return true;
});
this.transcript.push(...newUserItems.map(stripInternalFields));
}
} }
this.onLoading(true); this.onLoading(true);
@@ -553,6 +605,24 @@ export class AgentLoop {
// API schema and therefore causes a 400 error when included // API schema and therefore causes a 400 error when included
// in subsequent requests whose context is sent verbatim. // in subsequent requests whose context is sent verbatim.
// Skip items that we have already inserted earlier or that the
// model does not need to see again in the next turn.
// • function_call superseded by the forthcoming
// function_call_output.
// • reasoning internal only, never sent back.
// • user messages we added these to the transcript when
// building the first turnInput; stageItem would add a
// duplicate.
if (
(item as ResponseInputItem).type === "function_call" ||
(item as ResponseInputItem).type === "reasoning" ||
((item as ResponseInputItem).type === "message" &&
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(item as any).role === "user")
) {
return;
}
const clone: ResponseInputItem = { const clone: ResponseInputItem = {
...(item as unknown as ResponseInputItem), ...(item as unknown as ResponseInputItem),
} as ResponseInputItem; } as ResponseInputItem;
@@ -578,7 +648,18 @@ export class AgentLoop {
// Only surface the *new* input items to the UI replaying the entire // Only surface the *new* input items to the UI replaying the entire
// transcript would duplicate messages that have already been shown in // transcript would duplicate messages that have already been shown in
// earlier turns. // earlier turns.
const deltaInput = [...abortOutputs, ...input]; // `turnInput` holds the *new* items that will be sent to the API in
// this iteration. Surface exactly these to the UI so that we do not
// reemit messages from previous turns (which would duplicate user
// prompts) and so that freshly generated `function_call_output`s are
// shown immediately.
// Figure out what subset of `turnInput` constitutes *new* information
// for the UI so that we dont spam the interface with repeats of the
// entire transcript on every iteration when response storage is
// disabled.
const deltaInput = this.disableResponseStorage
? turnInput.slice(transcriptPrefixLen)
: [...turnInput];
for (const item of deltaInput) { for (const item of deltaInput) {
stageItem(item as ResponseItem); stageItem(item as ResponseItem);
} }
@@ -629,31 +710,12 @@ export class AgentLoop {
store: true, store: true,
previous_response_id: lastResponseId || undefined, previous_response_id: lastResponseId || undefined,
}), }),
tools: [ tools: [shellTool],
{ // Explicitly tell the model it is allowed to pick whatever
type: "function", // tool it deems appropriate. Omitting this sometimes leads to
name: "shell", // the model ignoring the available tools and responding with
description: "Runs a shell command, and returns its output.", // plain text instead (resulting in a missing toolcall).
strict: true, tool_choice: "auto",
parameters: {
type: "object",
properties: {
command: { type: "array", items: { type: "string" } },
workdir: {
type: "string",
description: "The working directory for the command.",
},
timeout: {
type: "number",
description:
"The maximum time to wait for the command to complete in milliseconds.",
},
},
required: ["command", "workdir", "timeout"],
additionalProperties: false,
},
},
],
}); });
break; break;
} catch (error) { } catch (error) {
@@ -883,13 +945,60 @@ export class AgentLoop {
stageItem(item as ResponseItem); stageItem(item as ResponseItem);
} }
} }
if (event.response.status === "completed") { if (
event.response.status === "completed" ||
(event.response.status as unknown as string) ===
"requires_action"
) {
// TODO: remove this once we can depend on streaming events // TODO: remove this once we can depend on streaming events
const newTurnInput = await this.processEventsWithoutStreaming( const newTurnInput = await this.processEventsWithoutStreaming(
event.response.output, event.response.output,
stageItem, stageItem,
); );
turnInput = newTurnInput;
// When we do not use serverside storage we maintain our
// own transcript so that *future* turns still contain full
// conversational context. However, whether we advance to
// another loop iteration should depend solely on the
// presence of *new* input items (i.e. items that were not
// part of the previous request). Resending the transcript
// by itself would create an infinite request loop because
// `turnInput.length` would never reach zero.
if (this.disableResponseStorage) {
// 1) Append the freshly emitted output to our local
// transcript (minus nonmessage items the model does
// not need to see again).
const cleaned = filterToApiMessages(
event.response.output.map(stripInternalFields),
);
this.transcript.push(...cleaned);
// 2) Determine the *delta* (newTurnInput) that must be
// sent in the next iteration. If there is none we can
// safely terminate the loop the transcript alone
// does not constitute new information for the
// assistant to act upon.
const delta = filterToApiMessages(
newTurnInput.map(stripInternalFields),
);
if (delta.length === 0) {
// No new input => end conversation.
turnInput = [];
} else {
// Resend full transcript *plus* the new delta so the
// stateless backend receives complete context.
turnInput = [...this.transcript, ...delta];
// The prefix ends at the current transcript length
// everything after this index is new for the next
// iteration.
transcriptPrefixLen = this.transcript.length;
}
} else {
turnInput = newTurnInput;
}
} }
lastResponseId = event.response.id; lastResponseId = event.response.id;
this.onLastResponseId(event.response.id); this.onLastResponseId(event.response.id);
@@ -951,45 +1060,27 @@ export class AgentLoop {
params as ResponseCreateParams & { stream: true }, params as ResponseCreateParams & { stream: true },
); );
log(
"agentLoop.run(): responseCall(1): turnInput: " +
JSON.stringify(turnInput),
);
// eslint-disable-next-line no-await-in-loop // eslint-disable-next-line no-await-in-loop
stream = await responseCall({ stream = await responseCall({
model: this.model, model: this.model,
instructions: mergedInstructions, instructions: mergedInstructions,
previous_response_id: lastResponseId || undefined,
input: turnInput, input: turnInput,
stream: true, stream: true,
parallel_tool_calls: false, parallel_tool_calls: false,
reasoning, reasoning,
...(this.config.flexMode ? { service_tier: "flex" } : {}), ...(this.config.flexMode ? { service_tier: "flex" } : {}),
tools: [ ...(this.disableResponseStorage
{ ? { store: false }
type: "function", : {
name: "shell", store: true,
description: previous_response_id: lastResponseId || undefined,
"Runs a shell command, and returns its output.", }),
strict: false, tools: [shellTool],
parameters: { tool_choice: "auto",
type: "object",
properties: {
command: {
type: "array",
items: { type: "string" },
},
workdir: {
type: "string",
description: "The working directory for the command.",
},
timeout: {
type: "number",
description:
"The maximum time to wait for the command to complete in milliseconds.",
},
},
required: ["command"],
additionalProperties: false,
},
},
],
}); });
this.currentStream = stream; this.currentStream = stream;
@@ -1393,3 +1484,17 @@ You MUST adhere to the following criteria when executing the task:
- When your task involves writing or modifying files: - When your task involves writing or modifying files:
- Do NOT tell the user to "save the file" or "copy the code into a file" if you already created or modified the file using \`apply_patch\`. Instead, reference the file as already saved. - Do NOT tell the user to "save the file" or "copy the code into a file" if you already created or modified the file using \`apply_patch\`. Instead, reference the file as already saved.
- Do NOT show the full contents of large files you have already written, unless the user explicitly asks for them.`; - Do NOT show the full contents of large files you have already written, unless the user explicitly asks for them.`;
function filterToApiMessages(
items: Array<ResponseInputItem>,
): Array<ResponseInputItem> {
return items.filter((it) => {
if (it.type === "message" && it.role === "system") {
return false;
}
if (it.type === "reasoning") {
return false;
}
return true;
});
}

View File

@@ -81,7 +81,13 @@ export function parseToolCallArguments(
} }
const { cmd, command } = json as Record<string, unknown>; const { cmd, command } = json as Record<string, unknown>;
const commandArray = toStringArray(cmd) ?? toStringArray(command); // The OpenAI model sometimes produces a single string instead of an array.
// Accept both shapes:
const commandArray =
toStringArray(cmd) ??
toStringArray(command) ??
(typeof cmd === "string" ? [cmd] : undefined) ??
(typeof command === "string" ? [command] : undefined);
if (commandArray == null) { if (commandArray == null) {
return undefined; return undefined;
} }