add: codex-mini-latest (#951)

💽 --------- Co-authored-by: Trevor Creech <tcreech@openai.com>
2025-05-16 08:04:00 -07:00
parent ce2ecbe72f
commit 070499f534
9 changed files with 141 additions and 20 deletions
--- a/codex-cli/src/cli.tsx
+++ b/codex-cli/src/cli.tsx
@@ -56,7 +56,7 @@ const cli = meow(
    --version                       Print version and exit
    -h, --help                      Show usage and exit
-    -m, --model <model>             Model to use for completions (default: o4-mini)
+    -m, --model <model>             Model to use for completions (default: codex-mini-latest)
    -p, --provider <provider>       Provider to use for completions (default: openai)
    -i, --image <path>              Path(s) to image files to include as input
    -v, --view <rollout>            Inspect a previously saved rollout instead of starting a session
--- a/codex-cli/src/components/chat/terminal-chat-response-item.tsx
+++ b/codex-cli/src/components/chat/terminal-chat-response-item.tsx
@@ -19,6 +19,7 @@ import { parse, setOptions } from "marked";
 import TerminalRenderer from "marked-terminal";
 import path from "path";
 import React, { useEffect, useMemo } from "react";
 import { formatCommandForDisplay } from "src/format-command.js";
 import supportsHyperlinks from "supports-hyperlinks";
 export default function TerminalChatResponseItem({
@@ -41,8 +42,12 @@ export default function TerminalChatResponseItem({
          fileOpener={fileOpener}
        />
      );
    // @ts-expect-error new item types aren't in SDK yet
    case "local_shell_call":
    case "function_call":
      return <TerminalChatResponseToolCall message={item} />;
    // @ts-expect-error new item types aren't in SDK yet
    case "local_shell_call_output":
    case "function_call_output":
      return (
        <TerminalChatResponseToolCallOutput
@@ -166,21 +171,28 @@ function TerminalChatResponseMessage({
 function TerminalChatResponseToolCall({
  message,
 }: {
-  message: ResponseFunctionToolCallItem;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  message: ResponseFunctionToolCallItem | any;
 }) {
-  const details = parseToolCall(message);
+  let workdir: string | undefined;
  let cmdReadableText: string | undefined;
  if (message.type === "function_call") {
    const details = parseToolCall(message);
    workdir = details?.workdir;
    cmdReadableText = details?.cmdReadableText;
  } else if (message.type === "local_shell_call") {
    const action = message.action;
    workdir = action.working_directory;
    cmdReadableText = formatCommandForDisplay(action.command);
  }
  return (
    <Box flexDirection="column" gap={1}>
      <Text color="magentaBright" bold>
        command
-        {details?.workdir ? (
+        {workdir ? <Text dimColor>{` (${workdir})`}</Text> : ""}
          <Text dimColor>{` (${details?.workdir})`}</Text>
        ) : (
          ""
        )}
      </Text>
      <Text>
-        <Text dimColor>$</Text> {details?.cmdReadableText}
+        <Text dimColor>$</Text> {cmdReadableText}
      </Text>
    </Box>
  );
@@ -190,7 +202,8 @@ function TerminalChatResponseToolCallOutput({
  message,
  fullStdout,
 }: {
-  message: ResponseFunctionToolCallOutputItem;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  message: ResponseFunctionToolCallOutputItem | any;
  fullStdout: boolean;
 }) {
  const { output, metadata } = parseToolCallOutput(message.output);
--- a/codex-cli/src/utils/agent/agent-loop.ts
+++ b/codex-cli/src/utils/agent/agent-loop.ts
@@ -8,6 +8,7 @@ import type {
  ResponseItem,
  ResponseCreateParams,
  FunctionTool,
  Tool,
 } from "openai/resources/responses/responses.mjs";
 import type { Reasoning } from "openai/resources.mjs";
@@ -84,7 +85,7 @@ type AgentLoopParams = {
  onLastResponseId: (lastResponseId: string) => void;
 };
-const shellTool: FunctionTool = {
+const shellFunctionTool: FunctionTool = {
  type: "function",
  name: "shell",
  description: "Runs a shell command, and returns its output.",
@@ -108,6 +109,11 @@ const shellTool: FunctionTool = {
  },
 };
 const localShellTool: Tool = {
  //@ts-expect-error - waiting on sdk
  type: "local_shell",
 };
 export class AgentLoop {
  private model: string;
  private provider: string;
@@ -461,6 +467,73 @@ export class AgentLoop {
    return [outputItem, ...additionalItems];
  }
  private async handleLocalShellCall(
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    item: any,
  ): Promise<Array<ResponseInputItem>> {
    // If the agent has been canceled in the meantime we should not perform any
    // additional work. Returning an empty array ensures that we neither execute
    // the requested tool call nor enqueue any follow‑up input items. This keeps
    // the cancellation semantics intuitive for users – once they interrupt a
    // task no further actions related to that task should be taken.
    if (this.canceled) {
      return [];
    }
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    const outputItem: any = {
      type: "local_shell_call_output",
      // `call_id` is mandatory – ensure we never send `undefined` which would
      // trigger the "No tool output found…" 400 from the API.
      call_id: item.call_id,
      output: "no function found",
    };
    // We intentionally *do not* remove this `callId` from the `pendingAborts`
    // set right away.  The output produced below is only queued up for the
    // *next* request to the OpenAI API – it has not been delivered yet.  If
    // the user presses ESC‑ESC (i.e. invokes `cancel()`) in the small window
    // between queuing the result and the actual network call, we need to be
    // able to surface a synthetic `function_call_output` marked as
    // "aborted".  Keeping the ID in the set until the run concludes
    // successfully lets the next `run()` differentiate between an aborted
    // tool call (needs the synthetic output) and a completed one (cleared
    // below in the `flush()` helper).
    // used to tell model to stop if needed
    const additionalItems: Array<ResponseInputItem> = [];
    if (item.action.type !== "exec") {
      throw new Error("Invalid action type");
    }
    const args = {
      cmd: item.action.command,
      workdir: item.action.working_directory,
      timeoutInMillis: item.action.timeout_ms,
    };
    const {
      outputText,
      metadata,
      additionalItems: additionalItemsFromExec,
    } = await handleExecCommand(
      args,
      this.config,
      this.approvalPolicy,
      this.additionalWritableRoots,
      this.getCommandConfirmation,
      this.execAbortController?.signal,
    );
    outputItem.output = JSON.stringify({ output: outputText, metadata });
    if (additionalItemsFromExec) {
      additionalItems.push(...additionalItemsFromExec);
    }
    return [outputItem, ...additionalItems];
  }
  public async run(
    input: Array<ResponseInputItem>,
    previousResponseId: string = "",
@@ -545,6 +618,11 @@ export class AgentLoop {
      // `disableResponseStorage === true`.
      let transcriptPrefixLen = 0;
      let tools: Array<Tool> = [shellFunctionTool];
      if (this.model.startsWith("codex")) {
        tools = [localShellTool];
      }
      const stripInternalFields = (
        item: ResponseInputItem,
      ): ResponseInputItem => {
@@ -648,6 +726,8 @@ export class AgentLoop {
                if (
                  (item as ResponseInputItem).type === "function_call" ||
                  (item as ResponseInputItem).type === "reasoning" ||
                  //@ts-expect-error - waiting on sdk
                  (item as ResponseInputItem).type === "local_shell_call" ||
                  ((item as ResponseInputItem).type === "message" &&
                    // eslint-disable-next-line @typescript-eslint/no-explicit-any
                    (item as any).role === "user")
@@ -748,7 +828,7 @@ export class AgentLoop {
                    store: true,
                    previous_response_id: lastResponseId || undefined,
                  }),
-              tools: [shellTool],
+              tools: tools,
              // Explicitly tell the model it is allowed to pick whatever
              // tool it deems appropriate.  Omitting this sometimes leads to
              // the model ignoring the available tools and responding with
@@ -968,7 +1048,10 @@ export class AgentLoop {
                if (maybeReasoning.type === "reasoning") {
                  maybeReasoning.duration_ms = Date.now() - thinkingStart;
                }
-                if (item.type === "function_call") {
+                if (
                  item.type === "function_call" ||
                  item.type === "local_shell_call"
                ) {
                  // Track outstanding tool call so we can abort later if needed.
                  // The item comes from the streaming response, therefore it has
                  // either `id` (chat) or `call_id` (responses) – we normalise
@@ -1091,7 +1174,11 @@ export class AgentLoop {
              let reasoning: Reasoning | undefined;
              if (this.model.startsWith("o")) {
                reasoning = { effort: "high" };
-                if (this.model === "o3" || this.model === "o4-mini") {
+                if (
                  this.model === "o3" ||
                  this.model === "o4-mini" ||
                  this.model === "codex-mini-latest"
                ) {
                  reasoning.summary = "auto";
                }
              }
@@ -1130,7 +1217,7 @@ export class AgentLoop {
                      store: true,
                      previous_response_id: lastResponseId || undefined,
                    }),
-                tools: [shellTool],
+                tools: tools,
                tool_choice: "auto",
              });
@@ -1492,6 +1579,17 @@ export class AgentLoop {
        // eslint-disable-next-line no-await-in-loop
        const result = await this.handleFunctionCall(item);
        turnInput.push(...result);
        //@ts-expect-error - waiting on sdk
      } else if (item.type === "local_shell_call") {
        //@ts-expect-error - waiting on sdk
        if (alreadyProcessedResponses.has(item.id)) {
          continue;
        }
        //@ts-expect-error - waiting on sdk
        alreadyProcessedResponses.add(item.id);
        // eslint-disable-next-line no-await-in-loop
        const result = await this.handleLocalShellCall(item);
        turnInput.push(...result);
      }
      emitItem(item as ResponseItem);
    }
--- a/codex-cli/src/utils/config.ts
+++ b/codex-cli/src/utils/config.ts
@@ -43,7 +43,7 @@ if (!isVitest) {
  loadDotenv({ path: USER_WIDE_CONFIG_PATH });
 }
-export const DEFAULT_AGENTIC_MODEL = "o4-mini";
+export const DEFAULT_AGENTIC_MODEL = "codex-mini-latest";
 export const DEFAULT_FULL_CONTEXT_MODEL = "gpt-4.1";
 export const DEFAULT_APPROVAL_MODE = AutoApprovalMode.SUGGEST;
 export const DEFAULT_INSTRUCTIONS = "";
--- a/codex-cli/src/utils/model-info.ts
+++ b/codex-cli/src/utils/model-info.ts
@@ -19,6 +19,10 @@ export const openAiModelInfo = {
    label: "o3 (2025-04-16)",
    maxContextLength: 200000,
  },
  "codex-mini-latest": {
    label: "codex-mini-latest",
    maxContextLength: 200000,
  },
  "o4-mini": {
    label: "o4 Mini",
    maxContextLength: 200000,
--- a/codex-cli/tests/config.test.tsx
+++ b/codex-cli/tests/config.test.tsx
@@ -67,7 +67,7 @@ test("loads default config if files don't exist", () => {
  });
  // Keep the test focused on just checking that default model and instructions are loaded
  // so we need to make sure we check just these properties
-  expect(config.model).toBe("o4-mini");
+  expect(config.model).toBe("codex-mini-latest");
  expect(config.instructions).toBe("");
 });
--- a/codex-cli/tests/disableResponseStorage.agentLoop.test.ts
+++ b/codex-cli/tests/disableResponseStorage.agentLoop.test.ts
@@ -29,7 +29,7 @@ describe.each([
 ])("AgentLoop with disableResponseStorage=%s", ({ flag, title }) => {
  /* build a fresh config for each case */
  const cfg: AppConfig = {
-    model: "o4-mini",
+    model: "codex-mini-latest",
    provider: "openai",
    instructions: "",
    disableResponseStorage: flag,
--- a/codex-cli/tests/disableResponseStorage.test.ts
+++ b/codex-cli/tests/disableResponseStorage.test.ts
@@ -21,7 +21,10 @@ describe("disableResponseStorage persistence", () => {
    mkdirSync(codexDir, { recursive: true });
    // seed YAML with ZDR enabled
-    writeFileSync(yamlPath, "model: o4-mini\ndisableResponseStorage: true\n");
+    writeFileSync(
      yamlPath,
      "model: codex-mini-latest\ndisableResponseStorage: true\n",
    );
  });
  afterAll((): void => {
--- a/codex-cli/tests/model-utils-network-error.test.ts
+++ b/codex-cli/tests/model-utils-network-error.test.ts
@@ -44,7 +44,10 @@ describe("model-utils – offline resilience", () => {
      "../src/utils/model-utils.js"
    );
-    const supported = await isModelSupportedForResponses("openai", "o4-mini");
+    const supported = await isModelSupportedForResponses(
      "openai",
      "codex-mini-latest",
    );
    expect(supported).toBe(true);
  });