add: codex-mini-latest (#951)

💽

---------

Co-authored-by: Trevor Creech <tcreech@openai.com>
This commit is contained in:
Fouad Matin
2025-05-16 08:04:00 -07:00
committed by GitHub
parent ce2ecbe72f
commit 070499f534
9 changed files with 141 additions and 20 deletions

View File

@@ -56,7 +56,7 @@ const cli = meow(
--version Print version and exit
-h, --help Show usage and exit
-m, --model <model> Model to use for completions (default: o4-mini)
-m, --model <model> Model to use for completions (default: codex-mini-latest)
-p, --provider <provider> Provider to use for completions (default: openai)
-i, --image <path> Path(s) to image files to include as input
-v, --view <rollout> Inspect a previously saved rollout instead of starting a session

View File

@@ -19,6 +19,7 @@ import { parse, setOptions } from "marked";
import TerminalRenderer from "marked-terminal";
import path from "path";
import React, { useEffect, useMemo } from "react";
import { formatCommandForDisplay } from "src/format-command.js";
import supportsHyperlinks from "supports-hyperlinks";
export default function TerminalChatResponseItem({
@@ -41,8 +42,12 @@ export default function TerminalChatResponseItem({
fileOpener={fileOpener}
/>
);
// @ts-expect-error new item types aren't in SDK yet
case "local_shell_call":
case "function_call":
return <TerminalChatResponseToolCall message={item} />;
// @ts-expect-error new item types aren't in SDK yet
case "local_shell_call_output":
case "function_call_output":
return (
<TerminalChatResponseToolCallOutput
@@ -166,21 +171,28 @@ function TerminalChatResponseMessage({
function TerminalChatResponseToolCall({
message,
}: {
message: ResponseFunctionToolCallItem;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
message: ResponseFunctionToolCallItem | any;
}) {
const details = parseToolCall(message);
let workdir: string | undefined;
let cmdReadableText: string | undefined;
if (message.type === "function_call") {
const details = parseToolCall(message);
workdir = details?.workdir;
cmdReadableText = details?.cmdReadableText;
} else if (message.type === "local_shell_call") {
const action = message.action;
workdir = action.working_directory;
cmdReadableText = formatCommandForDisplay(action.command);
}
return (
<Box flexDirection="column" gap={1}>
<Text color="magentaBright" bold>
command
{details?.workdir ? (
<Text dimColor>{` (${details?.workdir})`}</Text>
) : (
""
)}
{workdir ? <Text dimColor>{` (${workdir})`}</Text> : ""}
</Text>
<Text>
<Text dimColor>$</Text> {details?.cmdReadableText}
<Text dimColor>$</Text> {cmdReadableText}
</Text>
</Box>
);
@@ -190,7 +202,8 @@ function TerminalChatResponseToolCallOutput({
message,
fullStdout,
}: {
message: ResponseFunctionToolCallOutputItem;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
message: ResponseFunctionToolCallOutputItem | any;
fullStdout: boolean;
}) {
const { output, metadata } = parseToolCallOutput(message.output);

View File

@@ -8,6 +8,7 @@ import type {
ResponseItem,
ResponseCreateParams,
FunctionTool,
Tool,
} from "openai/resources/responses/responses.mjs";
import type { Reasoning } from "openai/resources.mjs";
@@ -84,7 +85,7 @@ type AgentLoopParams = {
onLastResponseId: (lastResponseId: string) => void;
};
const shellTool: FunctionTool = {
const shellFunctionTool: FunctionTool = {
type: "function",
name: "shell",
description: "Runs a shell command, and returns its output.",
@@ -108,6 +109,11 @@ const shellTool: FunctionTool = {
},
};
const localShellTool: Tool = {
//@ts-expect-error - waiting on sdk
type: "local_shell",
};
export class AgentLoop {
private model: string;
private provider: string;
@@ -461,6 +467,73 @@ export class AgentLoop {
return [outputItem, ...additionalItems];
}
private async handleLocalShellCall(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
item: any,
): Promise<Array<ResponseInputItem>> {
// If the agent has been canceled in the meantime we should not perform any
// additional work. Returning an empty array ensures that we neither execute
// the requested tool call nor enqueue any followup input items. This keeps
// the cancellation semantics intuitive for users once they interrupt a
// task no further actions related to that task should be taken.
if (this.canceled) {
return [];
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const outputItem: any = {
type: "local_shell_call_output",
// `call_id` is mandatory ensure we never send `undefined` which would
// trigger the "No tool output found…" 400 from the API.
call_id: item.call_id,
output: "no function found",
};
// We intentionally *do not* remove this `callId` from the `pendingAborts`
// set right away. The output produced below is only queued up for the
// *next* request to the OpenAI API it has not been delivered yet. If
// the user presses ESCESC (i.e. invokes `cancel()`) in the small window
// between queuing the result and the actual network call, we need to be
// able to surface a synthetic `function_call_output` marked as
// "aborted". Keeping the ID in the set until the run concludes
// successfully lets the next `run()` differentiate between an aborted
// tool call (needs the synthetic output) and a completed one (cleared
// below in the `flush()` helper).
// used to tell model to stop if needed
const additionalItems: Array<ResponseInputItem> = [];
if (item.action.type !== "exec") {
throw new Error("Invalid action type");
}
const args = {
cmd: item.action.command,
workdir: item.action.working_directory,
timeoutInMillis: item.action.timeout_ms,
};
const {
outputText,
metadata,
additionalItems: additionalItemsFromExec,
} = await handleExecCommand(
args,
this.config,
this.approvalPolicy,
this.additionalWritableRoots,
this.getCommandConfirmation,
this.execAbortController?.signal,
);
outputItem.output = JSON.stringify({ output: outputText, metadata });
if (additionalItemsFromExec) {
additionalItems.push(...additionalItemsFromExec);
}
return [outputItem, ...additionalItems];
}
public async run(
input: Array<ResponseInputItem>,
previousResponseId: string = "",
@@ -545,6 +618,11 @@ export class AgentLoop {
// `disableResponseStorage === true`.
let transcriptPrefixLen = 0;
let tools: Array<Tool> = [shellFunctionTool];
if (this.model.startsWith("codex")) {
tools = [localShellTool];
}
const stripInternalFields = (
item: ResponseInputItem,
): ResponseInputItem => {
@@ -648,6 +726,8 @@ export class AgentLoop {
if (
(item as ResponseInputItem).type === "function_call" ||
(item as ResponseInputItem).type === "reasoning" ||
//@ts-expect-error - waiting on sdk
(item as ResponseInputItem).type === "local_shell_call" ||
((item as ResponseInputItem).type === "message" &&
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(item as any).role === "user")
@@ -748,7 +828,7 @@ export class AgentLoop {
store: true,
previous_response_id: lastResponseId || undefined,
}),
tools: [shellTool],
tools: tools,
// Explicitly tell the model it is allowed to pick whatever
// tool it deems appropriate. Omitting this sometimes leads to
// the model ignoring the available tools and responding with
@@ -968,7 +1048,10 @@ export class AgentLoop {
if (maybeReasoning.type === "reasoning") {
maybeReasoning.duration_ms = Date.now() - thinkingStart;
}
if (item.type === "function_call") {
if (
item.type === "function_call" ||
item.type === "local_shell_call"
) {
// Track outstanding tool call so we can abort later if needed.
// The item comes from the streaming response, therefore it has
// either `id` (chat) or `call_id` (responses) we normalise
@@ -1091,7 +1174,11 @@ export class AgentLoop {
let reasoning: Reasoning | undefined;
if (this.model.startsWith("o")) {
reasoning = { effort: "high" };
if (this.model === "o3" || this.model === "o4-mini") {
if (
this.model === "o3" ||
this.model === "o4-mini" ||
this.model === "codex-mini-latest"
) {
reasoning.summary = "auto";
}
}
@@ -1130,7 +1217,7 @@ export class AgentLoop {
store: true,
previous_response_id: lastResponseId || undefined,
}),
tools: [shellTool],
tools: tools,
tool_choice: "auto",
});
@@ -1492,6 +1579,17 @@ export class AgentLoop {
// eslint-disable-next-line no-await-in-loop
const result = await this.handleFunctionCall(item);
turnInput.push(...result);
//@ts-expect-error - waiting on sdk
} else if (item.type === "local_shell_call") {
//@ts-expect-error - waiting on sdk
if (alreadyProcessedResponses.has(item.id)) {
continue;
}
//@ts-expect-error - waiting on sdk
alreadyProcessedResponses.add(item.id);
// eslint-disable-next-line no-await-in-loop
const result = await this.handleLocalShellCall(item);
turnInput.push(...result);
}
emitItem(item as ResponseItem);
}

View File

@@ -43,7 +43,7 @@ if (!isVitest) {
loadDotenv({ path: USER_WIDE_CONFIG_PATH });
}
export const DEFAULT_AGENTIC_MODEL = "o4-mini";
export const DEFAULT_AGENTIC_MODEL = "codex-mini-latest";
export const DEFAULT_FULL_CONTEXT_MODEL = "gpt-4.1";
export const DEFAULT_APPROVAL_MODE = AutoApprovalMode.SUGGEST;
export const DEFAULT_INSTRUCTIONS = "";

View File

@@ -19,6 +19,10 @@ export const openAiModelInfo = {
label: "o3 (2025-04-16)",
maxContextLength: 200000,
},
"codex-mini-latest": {
label: "codex-mini-latest",
maxContextLength: 200000,
},
"o4-mini": {
label: "o4 Mini",
maxContextLength: 200000,

View File

@@ -67,7 +67,7 @@ test("loads default config if files don't exist", () => {
});
// Keep the test focused on just checking that default model and instructions are loaded
// so we need to make sure we check just these properties
expect(config.model).toBe("o4-mini");
expect(config.model).toBe("codex-mini-latest");
expect(config.instructions).toBe("");
});

View File

@@ -29,7 +29,7 @@ describe.each([
])("AgentLoop with disableResponseStorage=%s", ({ flag, title }) => {
/* build a fresh config for each case */
const cfg: AppConfig = {
model: "o4-mini",
model: "codex-mini-latest",
provider: "openai",
instructions: "",
disableResponseStorage: flag,

View File

@@ -21,7 +21,10 @@ describe("disableResponseStorage persistence", () => {
mkdirSync(codexDir, { recursive: true });
// seed YAML with ZDR enabled
writeFileSync(yamlPath, "model: o4-mini\ndisableResponseStorage: true\n");
writeFileSync(
yamlPath,
"model: codex-mini-latest\ndisableResponseStorage: true\n",
);
});
afterAll((): void => {

View File

@@ -44,7 +44,10 @@ describe("model-utils offline resilience", () => {
"../src/utils/model-utils.js"
);
const supported = await isModelSupportedForResponses("openai", "o4-mini");
const supported = await isModelSupportedForResponses(
"openai",
"codex-mini-latest",
);
expect(supported).toBe(true);
});