feat: add support for ZDR orgs (#481)

- Add `store: boolean` to `AgentLoop` to enable client-side storage of
response items
- Add `--disable-response-storage` arg + `disableResponseStorage` config
This commit is contained in:
Fouad Matin
2025-04-22 01:30:16 -07:00
committed by GitHub
parent 3eba86a553
commit 9f5ccbb618
5 changed files with 153 additions and 5 deletions

View File

@@ -46,6 +46,14 @@ type AgentLoopParams = {
config?: AppConfig;
instructions?: string;
approvalPolicy: ApprovalPolicy;
/**
* Whether the model responses should be stored on the server side (allows
* using `previous_response_id` to provide conversational context). Defaults
* to `true` to preserve the current behaviour. When set to `false` the agent
* will instead send the *full* conversation context as the `input` payload
* on every request and omit the `previous_response_id` parameter.
*/
disableResponseStorage?: boolean;
onItem: (item: ResponseItem) => void;
onLoading: (loading: boolean) => void;
@@ -67,6 +75,8 @@ export class AgentLoop {
private approvalPolicy: ApprovalPolicy;
private config: AppConfig;
private additionalWritableRoots: ReadonlyArray<string>;
/** Whether we ask the API to persist conversation state on the server */
private readonly disableResponseStorage: boolean;
// Using `InstanceType<typeof OpenAI>` sidesteps typing issues with the OpenAI package under
// the TS 5+ `moduleResolution=bundler` setup. OpenAI client instance. We keep the concrete
@@ -97,6 +107,13 @@ export class AgentLoop {
private execAbortController: AbortController | null = null;
/** Set to true when `cancel()` is called so `run()` can exit early. */
private canceled = false;
/**
* Local conversation transcript used when `disableResponseStorage === false`. Holds
* all nonsystem items exchanged so far so we can provide full context on
* every request.
*/
private transcript: Array<ResponseInputItem> = [];
/** Function calls that were emitted by the model but never answered because
* the user cancelled the run. We keep the `call_id`s around so the *next*
* request can send a dummy `function_call_output` that satisfies the
@@ -206,6 +223,7 @@ export class AgentLoop {
provider = "openai",
instructions,
approvalPolicy,
disableResponseStorage,
// `config` used to be required. Some unittests (and potentially other
// callers) instantiate `AgentLoop` without passing it, so we make it
// optional and fall back to sensible defaults. This keeps the public
@@ -240,6 +258,8 @@ export class AgentLoop {
this.onLoading = onLoading;
this.getCommandConfirmation = getCommandConfirmation;
this.onLastResponseId = onLastResponseId;
this.disableResponseStorage = disableResponseStorage ?? false;
this.sessionId = getSessionId() || randomUUID().replaceAll("-", "");
// Configure OpenAI client with optional timeout (ms) from environment
const timeoutMs = OPENAI_TIMEOUT_MS;
@@ -418,7 +438,13 @@ export class AgentLoop {
// accumulate listeners which in turn triggered Node's
// `MaxListenersExceededWarning` after ten invocations.
let lastResponseId: string = previousResponseId;
// Track the response ID from the last *stored* response so we can use
// `previous_response_id` when `disableResponseStorage` is enabled. When storage
// is disabled we deliberately ignore the callersupplied value because
// the backend will not retain any state that could be referenced.
let lastResponseId: string = this.disableResponseStorage
? previousResponseId
: "";
// If there are unresolved function calls from a previously cancelled run
// we have to emit dummy tool outputs so that the API no longer expects
@@ -440,7 +466,48 @@ export class AgentLoop {
this.pendingAborts.clear();
}
let turnInput = [...abortOutputs, ...input];
// Build the input list for this turn. When responses are stored on the
// server we can simply send the *delta* (the new user input as well as
// any pending abort outputs) and rely on `previous_response_id` for
// context. When storage is disabled the server has no memory of the
// conversation, so we must include the *entire* transcript (minus system
// messages) on every call.
let turnInput: Array<ResponseInputItem>;
const stripInternalFields = (
item: ResponseInputItem,
): ResponseInputItem => {
// Clone shallowly and remove fields that are not part of the public
// schema expected by the OpenAI Responses API.
// We shallowclone the item so that subsequent mutations (deleting
// internal fields) do not affect the original object which may still
// be referenced elsewhere (e.g. UI components).
const clean = { ...item } as Record<string, unknown>;
delete clean["duration_ms"];
return clean as unknown as ResponseInputItem;
};
if (this.disableResponseStorage) {
// Ensure the transcript is uptodate with the latest user input so
// that subsequent iterations see a complete history.
const newUserItems: Array<ResponseInputItem> = input.filter((it) => {
if (
(it.type === "message" && it.role !== "system") ||
it.type === "reasoning"
) {
return false;
}
return true;
});
this.transcript.push(...newUserItems);
turnInput = [...this.transcript, ...abortOutputs].map(
stripInternalFields,
);
} else {
turnInput = [...abortOutputs, ...input].map(stripInternalFields);
}
this.onLoading(true);
@@ -471,6 +538,33 @@ export class AgentLoop {
this.onItem(item);
// Mark as delivered so flush won't re-emit it
staged[idx] = undefined;
// When we operate without serverside storage we keep our own
// transcript so we can provide full context on subsequent calls.
if (this.disableResponseStorage) {
// Exclude system messages from transcript as they do not form
// part of the assistant/user dialogue that the model needs.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const role = (item as any).role;
if (role !== "system") {
// Clone the item to avoid mutating the object that is also
// rendered in the UI. We need to strip auxiliary metadata
// such as `duration_ms` which is not part of the Responses
// API schema and therefore causes a 400 error when included
// in subsequent requests whose context is sent verbatim.
const clone: ResponseInputItem = {
...(item as unknown as ResponseInputItem),
} as ResponseInputItem;
// The `duration_ms` field is only added to reasoning items to
// show elapsed time in the UI. It must not be forwarded back
// to the server.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
delete (clone as any).duration_ms;
this.transcript.push(clone);
}
}
}
}, 10);
};
@@ -481,7 +575,11 @@ export class AgentLoop {
return;
}
// send request to openAI
for (const item of turnInput) {
// Only surface the *new* input items to the UI replaying the entire
// transcript would duplicate messages that have already been shown in
// earlier turns.
const deltaInput = [...abortOutputs, ...input];
for (const item of deltaInput) {
stageItem(item as ResponseItem);
}
// Send request to OpenAI with retry on timeout
@@ -520,18 +618,23 @@ export class AgentLoop {
stream = await responseCall({
model: this.model,
instructions: mergedInstructions,
previous_response_id: lastResponseId || undefined,
input: turnInput,
stream: true,
parallel_tool_calls: false,
reasoning,
...(this.config.flexMode ? { service_tier: "flex" } : {}),
...(this.disableResponseStorage
? { store: false }
: {
store: true,
previous_response_id: lastResponseId || undefined,
}),
tools: [
{
type: "function",
name: "shell",
description: "Runs a shell command, and returns its output.",
strict: false,
strict: true,
parameters: {
type: "object",
properties: {

View File

@@ -73,6 +73,8 @@ export type StoredConfig = {
memory?: MemoryConfig;
/** Whether to enable desktop notifications for responses */
notify?: boolean;
/** Disable server-side response storage (send full transcript each request) */
disableResponseStorage?: boolean;
history?: {
maxSize?: number;
saveHistory?: boolean;
@@ -104,6 +106,9 @@ export type AppConfig = {
/** Whether to enable desktop notifications for responses */
notify: boolean;
/** Disable server-side response storage (send full transcript each request) */
disableResponseStorage?: boolean;
/** Enable the "flex-mode" processing mode for supported models (o3, o4-mini) */
flexMode?: boolean;
history?: {
@@ -293,6 +298,7 @@ export const loadConfig = (
instructions: combinedInstructions,
notify: storedConfig.notify === true,
approvalMode: storedConfig.approvalMode,
disableResponseStorage: storedConfig.disableResponseStorage ?? false,
};
// -----------------------------------------------------------------------