add: codex-mini-latest (#951)
💽 --------- Co-authored-by: Trevor Creech <tcreech@openai.com>
This commit is contained in:
@@ -56,7 +56,7 @@ const cli = meow(
|
|||||||
--version Print version and exit
|
--version Print version and exit
|
||||||
|
|
||||||
-h, --help Show usage and exit
|
-h, --help Show usage and exit
|
||||||
-m, --model <model> Model to use for completions (default: o4-mini)
|
-m, --model <model> Model to use for completions (default: codex-mini-latest)
|
||||||
-p, --provider <provider> Provider to use for completions (default: openai)
|
-p, --provider <provider> Provider to use for completions (default: openai)
|
||||||
-i, --image <path> Path(s) to image files to include as input
|
-i, --image <path> Path(s) to image files to include as input
|
||||||
-v, --view <rollout> Inspect a previously saved rollout instead of starting a session
|
-v, --view <rollout> Inspect a previously saved rollout instead of starting a session
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import { parse, setOptions } from "marked";
|
|||||||
import TerminalRenderer from "marked-terminal";
|
import TerminalRenderer from "marked-terminal";
|
||||||
import path from "path";
|
import path from "path";
|
||||||
import React, { useEffect, useMemo } from "react";
|
import React, { useEffect, useMemo } from "react";
|
||||||
|
import { formatCommandForDisplay } from "src/format-command.js";
|
||||||
import supportsHyperlinks from "supports-hyperlinks";
|
import supportsHyperlinks from "supports-hyperlinks";
|
||||||
|
|
||||||
export default function TerminalChatResponseItem({
|
export default function TerminalChatResponseItem({
|
||||||
@@ -41,8 +42,12 @@ export default function TerminalChatResponseItem({
|
|||||||
fileOpener={fileOpener}
|
fileOpener={fileOpener}
|
||||||
/>
|
/>
|
||||||
);
|
);
|
||||||
|
// @ts-expect-error new item types aren't in SDK yet
|
||||||
|
case "local_shell_call":
|
||||||
case "function_call":
|
case "function_call":
|
||||||
return <TerminalChatResponseToolCall message={item} />;
|
return <TerminalChatResponseToolCall message={item} />;
|
||||||
|
// @ts-expect-error new item types aren't in SDK yet
|
||||||
|
case "local_shell_call_output":
|
||||||
case "function_call_output":
|
case "function_call_output":
|
||||||
return (
|
return (
|
||||||
<TerminalChatResponseToolCallOutput
|
<TerminalChatResponseToolCallOutput
|
||||||
@@ -166,21 +171,28 @@ function TerminalChatResponseMessage({
|
|||||||
function TerminalChatResponseToolCall({
|
function TerminalChatResponseToolCall({
|
||||||
message,
|
message,
|
||||||
}: {
|
}: {
|
||||||
message: ResponseFunctionToolCallItem;
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
message: ResponseFunctionToolCallItem | any;
|
||||||
}) {
|
}) {
|
||||||
const details = parseToolCall(message);
|
let workdir: string | undefined;
|
||||||
|
let cmdReadableText: string | undefined;
|
||||||
|
if (message.type === "function_call") {
|
||||||
|
const details = parseToolCall(message);
|
||||||
|
workdir = details?.workdir;
|
||||||
|
cmdReadableText = details?.cmdReadableText;
|
||||||
|
} else if (message.type === "local_shell_call") {
|
||||||
|
const action = message.action;
|
||||||
|
workdir = action.working_directory;
|
||||||
|
cmdReadableText = formatCommandForDisplay(action.command);
|
||||||
|
}
|
||||||
return (
|
return (
|
||||||
<Box flexDirection="column" gap={1}>
|
<Box flexDirection="column" gap={1}>
|
||||||
<Text color="magentaBright" bold>
|
<Text color="magentaBright" bold>
|
||||||
command
|
command
|
||||||
{details?.workdir ? (
|
{workdir ? <Text dimColor>{` (${workdir})`}</Text> : ""}
|
||||||
<Text dimColor>{` (${details?.workdir})`}</Text>
|
|
||||||
) : (
|
|
||||||
""
|
|
||||||
)}
|
|
||||||
</Text>
|
</Text>
|
||||||
<Text>
|
<Text>
|
||||||
<Text dimColor>$</Text> {details?.cmdReadableText}
|
<Text dimColor>$</Text> {cmdReadableText}
|
||||||
</Text>
|
</Text>
|
||||||
</Box>
|
</Box>
|
||||||
);
|
);
|
||||||
@@ -190,7 +202,8 @@ function TerminalChatResponseToolCallOutput({
|
|||||||
message,
|
message,
|
||||||
fullStdout,
|
fullStdout,
|
||||||
}: {
|
}: {
|
||||||
message: ResponseFunctionToolCallOutputItem;
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
message: ResponseFunctionToolCallOutputItem | any;
|
||||||
fullStdout: boolean;
|
fullStdout: boolean;
|
||||||
}) {
|
}) {
|
||||||
const { output, metadata } = parseToolCallOutput(message.output);
|
const { output, metadata } = parseToolCallOutput(message.output);
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import type {
|
|||||||
ResponseItem,
|
ResponseItem,
|
||||||
ResponseCreateParams,
|
ResponseCreateParams,
|
||||||
FunctionTool,
|
FunctionTool,
|
||||||
|
Tool,
|
||||||
} from "openai/resources/responses/responses.mjs";
|
} from "openai/resources/responses/responses.mjs";
|
||||||
import type { Reasoning } from "openai/resources.mjs";
|
import type { Reasoning } from "openai/resources.mjs";
|
||||||
|
|
||||||
@@ -84,7 +85,7 @@ type AgentLoopParams = {
|
|||||||
onLastResponseId: (lastResponseId: string) => void;
|
onLastResponseId: (lastResponseId: string) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
const shellTool: FunctionTool = {
|
const shellFunctionTool: FunctionTool = {
|
||||||
type: "function",
|
type: "function",
|
||||||
name: "shell",
|
name: "shell",
|
||||||
description: "Runs a shell command, and returns its output.",
|
description: "Runs a shell command, and returns its output.",
|
||||||
@@ -108,6 +109,11 @@ const shellTool: FunctionTool = {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const localShellTool: Tool = {
|
||||||
|
//@ts-expect-error - waiting on sdk
|
||||||
|
type: "local_shell",
|
||||||
|
};
|
||||||
|
|
||||||
export class AgentLoop {
|
export class AgentLoop {
|
||||||
private model: string;
|
private model: string;
|
||||||
private provider: string;
|
private provider: string;
|
||||||
@@ -461,6 +467,73 @@ export class AgentLoop {
|
|||||||
return [outputItem, ...additionalItems];
|
return [outputItem, ...additionalItems];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async handleLocalShellCall(
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
item: any,
|
||||||
|
): Promise<Array<ResponseInputItem>> {
|
||||||
|
// If the agent has been canceled in the meantime we should not perform any
|
||||||
|
// additional work. Returning an empty array ensures that we neither execute
|
||||||
|
// the requested tool call nor enqueue any follow‑up input items. This keeps
|
||||||
|
// the cancellation semantics intuitive for users – once they interrupt a
|
||||||
|
// task no further actions related to that task should be taken.
|
||||||
|
if (this.canceled) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
const outputItem: any = {
|
||||||
|
type: "local_shell_call_output",
|
||||||
|
// `call_id` is mandatory – ensure we never send `undefined` which would
|
||||||
|
// trigger the "No tool output found…" 400 from the API.
|
||||||
|
call_id: item.call_id,
|
||||||
|
output: "no function found",
|
||||||
|
};
|
||||||
|
|
||||||
|
// We intentionally *do not* remove this `callId` from the `pendingAborts`
|
||||||
|
// set right away. The output produced below is only queued up for the
|
||||||
|
// *next* request to the OpenAI API – it has not been delivered yet. If
|
||||||
|
// the user presses ESC‑ESC (i.e. invokes `cancel()`) in the small window
|
||||||
|
// between queuing the result and the actual network call, we need to be
|
||||||
|
// able to surface a synthetic `function_call_output` marked as
|
||||||
|
// "aborted". Keeping the ID in the set until the run concludes
|
||||||
|
// successfully lets the next `run()` differentiate between an aborted
|
||||||
|
// tool call (needs the synthetic output) and a completed one (cleared
|
||||||
|
// below in the `flush()` helper).
|
||||||
|
|
||||||
|
// used to tell model to stop if needed
|
||||||
|
const additionalItems: Array<ResponseInputItem> = [];
|
||||||
|
|
||||||
|
if (item.action.type !== "exec") {
|
||||||
|
throw new Error("Invalid action type");
|
||||||
|
}
|
||||||
|
|
||||||
|
const args = {
|
||||||
|
cmd: item.action.command,
|
||||||
|
workdir: item.action.working_directory,
|
||||||
|
timeoutInMillis: item.action.timeout_ms,
|
||||||
|
};
|
||||||
|
|
||||||
|
const {
|
||||||
|
outputText,
|
||||||
|
metadata,
|
||||||
|
additionalItems: additionalItemsFromExec,
|
||||||
|
} = await handleExecCommand(
|
||||||
|
args,
|
||||||
|
this.config,
|
||||||
|
this.approvalPolicy,
|
||||||
|
this.additionalWritableRoots,
|
||||||
|
this.getCommandConfirmation,
|
||||||
|
this.execAbortController?.signal,
|
||||||
|
);
|
||||||
|
outputItem.output = JSON.stringify({ output: outputText, metadata });
|
||||||
|
|
||||||
|
if (additionalItemsFromExec) {
|
||||||
|
additionalItems.push(...additionalItemsFromExec);
|
||||||
|
}
|
||||||
|
|
||||||
|
return [outputItem, ...additionalItems];
|
||||||
|
}
|
||||||
|
|
||||||
public async run(
|
public async run(
|
||||||
input: Array<ResponseInputItem>,
|
input: Array<ResponseInputItem>,
|
||||||
previousResponseId: string = "",
|
previousResponseId: string = "",
|
||||||
@@ -545,6 +618,11 @@ export class AgentLoop {
|
|||||||
// `disableResponseStorage === true`.
|
// `disableResponseStorage === true`.
|
||||||
let transcriptPrefixLen = 0;
|
let transcriptPrefixLen = 0;
|
||||||
|
|
||||||
|
let tools: Array<Tool> = [shellFunctionTool];
|
||||||
|
if (this.model.startsWith("codex")) {
|
||||||
|
tools = [localShellTool];
|
||||||
|
}
|
||||||
|
|
||||||
const stripInternalFields = (
|
const stripInternalFields = (
|
||||||
item: ResponseInputItem,
|
item: ResponseInputItem,
|
||||||
): ResponseInputItem => {
|
): ResponseInputItem => {
|
||||||
@@ -648,6 +726,8 @@ export class AgentLoop {
|
|||||||
if (
|
if (
|
||||||
(item as ResponseInputItem).type === "function_call" ||
|
(item as ResponseInputItem).type === "function_call" ||
|
||||||
(item as ResponseInputItem).type === "reasoning" ||
|
(item as ResponseInputItem).type === "reasoning" ||
|
||||||
|
//@ts-expect-error - waiting on sdk
|
||||||
|
(item as ResponseInputItem).type === "local_shell_call" ||
|
||||||
((item as ResponseInputItem).type === "message" &&
|
((item as ResponseInputItem).type === "message" &&
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
(item as any).role === "user")
|
(item as any).role === "user")
|
||||||
@@ -748,7 +828,7 @@ export class AgentLoop {
|
|||||||
store: true,
|
store: true,
|
||||||
previous_response_id: lastResponseId || undefined,
|
previous_response_id: lastResponseId || undefined,
|
||||||
}),
|
}),
|
||||||
tools: [shellTool],
|
tools: tools,
|
||||||
// Explicitly tell the model it is allowed to pick whatever
|
// Explicitly tell the model it is allowed to pick whatever
|
||||||
// tool it deems appropriate. Omitting this sometimes leads to
|
// tool it deems appropriate. Omitting this sometimes leads to
|
||||||
// the model ignoring the available tools and responding with
|
// the model ignoring the available tools and responding with
|
||||||
@@ -968,7 +1048,10 @@ export class AgentLoop {
|
|||||||
if (maybeReasoning.type === "reasoning") {
|
if (maybeReasoning.type === "reasoning") {
|
||||||
maybeReasoning.duration_ms = Date.now() - thinkingStart;
|
maybeReasoning.duration_ms = Date.now() - thinkingStart;
|
||||||
}
|
}
|
||||||
if (item.type === "function_call") {
|
if (
|
||||||
|
item.type === "function_call" ||
|
||||||
|
item.type === "local_shell_call"
|
||||||
|
) {
|
||||||
// Track outstanding tool call so we can abort later if needed.
|
// Track outstanding tool call so we can abort later if needed.
|
||||||
// The item comes from the streaming response, therefore it has
|
// The item comes from the streaming response, therefore it has
|
||||||
// either `id` (chat) or `call_id` (responses) – we normalise
|
// either `id` (chat) or `call_id` (responses) – we normalise
|
||||||
@@ -1091,7 +1174,11 @@ export class AgentLoop {
|
|||||||
let reasoning: Reasoning | undefined;
|
let reasoning: Reasoning | undefined;
|
||||||
if (this.model.startsWith("o")) {
|
if (this.model.startsWith("o")) {
|
||||||
reasoning = { effort: "high" };
|
reasoning = { effort: "high" };
|
||||||
if (this.model === "o3" || this.model === "o4-mini") {
|
if (
|
||||||
|
this.model === "o3" ||
|
||||||
|
this.model === "o4-mini" ||
|
||||||
|
this.model === "codex-mini-latest"
|
||||||
|
) {
|
||||||
reasoning.summary = "auto";
|
reasoning.summary = "auto";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1130,7 +1217,7 @@ export class AgentLoop {
|
|||||||
store: true,
|
store: true,
|
||||||
previous_response_id: lastResponseId || undefined,
|
previous_response_id: lastResponseId || undefined,
|
||||||
}),
|
}),
|
||||||
tools: [shellTool],
|
tools: tools,
|
||||||
tool_choice: "auto",
|
tool_choice: "auto",
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1492,6 +1579,17 @@ export class AgentLoop {
|
|||||||
// eslint-disable-next-line no-await-in-loop
|
// eslint-disable-next-line no-await-in-loop
|
||||||
const result = await this.handleFunctionCall(item);
|
const result = await this.handleFunctionCall(item);
|
||||||
turnInput.push(...result);
|
turnInput.push(...result);
|
||||||
|
//@ts-expect-error - waiting on sdk
|
||||||
|
} else if (item.type === "local_shell_call") {
|
||||||
|
//@ts-expect-error - waiting on sdk
|
||||||
|
if (alreadyProcessedResponses.has(item.id)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
//@ts-expect-error - waiting on sdk
|
||||||
|
alreadyProcessedResponses.add(item.id);
|
||||||
|
// eslint-disable-next-line no-await-in-loop
|
||||||
|
const result = await this.handleLocalShellCall(item);
|
||||||
|
turnInput.push(...result);
|
||||||
}
|
}
|
||||||
emitItem(item as ResponseItem);
|
emitItem(item as ResponseItem);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ if (!isVitest) {
|
|||||||
loadDotenv({ path: USER_WIDE_CONFIG_PATH });
|
loadDotenv({ path: USER_WIDE_CONFIG_PATH });
|
||||||
}
|
}
|
||||||
|
|
||||||
export const DEFAULT_AGENTIC_MODEL = "o4-mini";
|
export const DEFAULT_AGENTIC_MODEL = "codex-mini-latest";
|
||||||
export const DEFAULT_FULL_CONTEXT_MODEL = "gpt-4.1";
|
export const DEFAULT_FULL_CONTEXT_MODEL = "gpt-4.1";
|
||||||
export const DEFAULT_APPROVAL_MODE = AutoApprovalMode.SUGGEST;
|
export const DEFAULT_APPROVAL_MODE = AutoApprovalMode.SUGGEST;
|
||||||
export const DEFAULT_INSTRUCTIONS = "";
|
export const DEFAULT_INSTRUCTIONS = "";
|
||||||
|
|||||||
@@ -19,6 +19,10 @@ export const openAiModelInfo = {
|
|||||||
label: "o3 (2025-04-16)",
|
label: "o3 (2025-04-16)",
|
||||||
maxContextLength: 200000,
|
maxContextLength: 200000,
|
||||||
},
|
},
|
||||||
|
"codex-mini-latest": {
|
||||||
|
label: "codex-mini-latest",
|
||||||
|
maxContextLength: 200000,
|
||||||
|
},
|
||||||
"o4-mini": {
|
"o4-mini": {
|
||||||
label: "o4 Mini",
|
label: "o4 Mini",
|
||||||
maxContextLength: 200000,
|
maxContextLength: 200000,
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ test("loads default config if files don't exist", () => {
|
|||||||
});
|
});
|
||||||
// Keep the test focused on just checking that default model and instructions are loaded
|
// Keep the test focused on just checking that default model and instructions are loaded
|
||||||
// so we need to make sure we check just these properties
|
// so we need to make sure we check just these properties
|
||||||
expect(config.model).toBe("o4-mini");
|
expect(config.model).toBe("codex-mini-latest");
|
||||||
expect(config.instructions).toBe("");
|
expect(config.instructions).toBe("");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ describe.each([
|
|||||||
])("AgentLoop with disableResponseStorage=%s", ({ flag, title }) => {
|
])("AgentLoop with disableResponseStorage=%s", ({ flag, title }) => {
|
||||||
/* build a fresh config for each case */
|
/* build a fresh config for each case */
|
||||||
const cfg: AppConfig = {
|
const cfg: AppConfig = {
|
||||||
model: "o4-mini",
|
model: "codex-mini-latest",
|
||||||
provider: "openai",
|
provider: "openai",
|
||||||
instructions: "",
|
instructions: "",
|
||||||
disableResponseStorage: flag,
|
disableResponseStorage: flag,
|
||||||
|
|||||||
@@ -21,7 +21,10 @@ describe("disableResponseStorage persistence", () => {
|
|||||||
mkdirSync(codexDir, { recursive: true });
|
mkdirSync(codexDir, { recursive: true });
|
||||||
|
|
||||||
// seed YAML with ZDR enabled
|
// seed YAML with ZDR enabled
|
||||||
writeFileSync(yamlPath, "model: o4-mini\ndisableResponseStorage: true\n");
|
writeFileSync(
|
||||||
|
yamlPath,
|
||||||
|
"model: codex-mini-latest\ndisableResponseStorage: true\n",
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
afterAll((): void => {
|
afterAll((): void => {
|
||||||
|
|||||||
@@ -44,7 +44,10 @@ describe("model-utils – offline resilience", () => {
|
|||||||
"../src/utils/model-utils.js"
|
"../src/utils/model-utils.js"
|
||||||
);
|
);
|
||||||
|
|
||||||
const supported = await isModelSupportedForResponses("openai", "o4-mini");
|
const supported = await isModelSupportedForResponses(
|
||||||
|
"openai",
|
||||||
|
"codex-mini-latest",
|
||||||
|
);
|
||||||
expect(supported).toBe(true);
|
expect(supported).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user