add: codex-mini-latest (#951)

💽 --------- Co-authored-by: Trevor Creech <tcreech@openai.com>
2025-05-16 08:04:00 -07:00
parent ce2ecbe72f
commit 070499f534
9 changed files with 141 additions and 20 deletions
--- a/codex-cli/src/cli.tsx
+++ b/codex-cli/src/cli.tsx
@@ -56,7 +56,7 @@ const cli = meow(
    --version                       Print version and exit

    -h, --help                      Show usage and exit
-    -m, --model <model>             Model to use for completions (default: o4-mini)
+    -m, --model <model>             Model to use for completions (default: codex-mini-latest)
    -p, --provider <provider>       Provider to use for completions (default: openai)
    -i, --image <path>              Path(s) to image files to include as input
    -v, --view <rollout>            Inspect a previously saved rollout instead of starting a session
--- a/codex-cli/src/components/chat/terminal-chat-response-item.tsx
+++ b/codex-cli/src/components/chat/terminal-chat-response-item.tsx
@@ -19,6 +19,7 @@ import { parse, setOptions } from "marked";
 import TerminalRenderer from "marked-terminal";
 import path from "path";
 import React, { useEffect, useMemo } from "react";
+import { formatCommandForDisplay } from "src/format-command.js";
 import supportsHyperlinks from "supports-hyperlinks";

 export default function TerminalChatResponseItem({
@@ -41,8 +42,12 @@ export default function TerminalChatResponseItem({
          fileOpener={fileOpener}
        />
      );
+    // @ts-expect-error new item types aren't in SDK yet
+    case "local_shell_call":
    case "function_call":
      return <TerminalChatResponseToolCall message={item} />;
+    // @ts-expect-error new item types aren't in SDK yet
+    case "local_shell_call_output":
    case "function_call_output":
      return (
        <TerminalChatResponseToolCallOutput
@@ -166,21 +171,28 @@ function TerminalChatResponseMessage({
 function TerminalChatResponseToolCall({
  message,
 }: {
-  message: ResponseFunctionToolCallItem;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  message: ResponseFunctionToolCallItem | any;
 }) {
-  const details = parseToolCall(message);
+  let workdir: string | undefined;
+  let cmdReadableText: string | undefined;
+  if (message.type === "function_call") {
+    const details = parseToolCall(message);
+    workdir = details?.workdir;
+    cmdReadableText = details?.cmdReadableText;
+  } else if (message.type === "local_shell_call") {
+    const action = message.action;
+    workdir = action.working_directory;
+    cmdReadableText = formatCommandForDisplay(action.command);
+  }
  return (
    <Box flexDirection="column" gap={1}>
      <Text color="magentaBright" bold>
        command
-        {details?.workdir ? (
-          <Text dimColor>{` (${details?.workdir})`}</Text>
-        ) : (
-          ""
-        )}
+        {workdir ? <Text dimColor>{` (${workdir})`}</Text> : ""}
      </Text>
      <Text>
-        <Text dimColor>$</Text> {details?.cmdReadableText}
+        <Text dimColor>$</Text> {cmdReadableText}
      </Text>
    </Box>
  );
@@ -190,7 +202,8 @@ function TerminalChatResponseToolCallOutput({
  message,
  fullStdout,
 }: {
-  message: ResponseFunctionToolCallOutputItem;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  message: ResponseFunctionToolCallOutputItem | any;
  fullStdout: boolean;
 }) {
  const { output, metadata } = parseToolCallOutput(message.output);
--- a/codex-cli/src/utils/agent/agent-loop.ts
+++ b/codex-cli/src/utils/agent/agent-loop.ts
@@ -8,6 +8,7 @@ import type {
  ResponseItem,
  ResponseCreateParams,
  FunctionTool,
+  Tool,
 } from "openai/resources/responses/responses.mjs";
 import type { Reasoning } from "openai/resources.mjs";

@@ -84,7 +85,7 @@ type AgentLoopParams = {
  onLastResponseId: (lastResponseId: string) => void;
 };

-const shellTool: FunctionTool = {
+const shellFunctionTool: FunctionTool = {
  type: "function",
  name: "shell",
  description: "Runs a shell command, and returns its output.",
@@ -108,6 +109,11 @@ const shellTool: FunctionTool = {
  },
 };

+const localShellTool: Tool = {
+  //@ts-expect-error - waiting on sdk
+  type: "local_shell",
+};
+
 export class AgentLoop {
  private model: string;
  private provider: string;
@@ -461,6 +467,73 @@ export class AgentLoop {
    return [outputItem, ...additionalItems];
  }

+  private async handleLocalShellCall(
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    item: any,
+  ): Promise<Array<ResponseInputItem>> {
+    // If the agent has been canceled in the meantime we should not perform any
+    // additional work. Returning an empty array ensures that we neither execute
+    // the requested tool call nor enqueue any follow‑up input items. This keeps
+    // the cancellation semantics intuitive for users – once they interrupt a
+    // task no further actions related to that task should be taken.
+    if (this.canceled) {
+      return [];
+    }
+
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const outputItem: any = {
+      type: "local_shell_call_output",
+      // `call_id` is mandatory – ensure we never send `undefined` which would
+      // trigger the "No tool output found…" 400 from the API.
+      call_id: item.call_id,
+      output: "no function found",
+    };
+
+    // We intentionally *do not* remove this `callId` from the `pendingAborts`
+    // set right away.  The output produced below is only queued up for the
+    // *next* request to the OpenAI API – it has not been delivered yet.  If
+    // the user presses ESC‑ESC (i.e. invokes `cancel()`) in the small window
+    // between queuing the result and the actual network call, we need to be
+    // able to surface a synthetic `function_call_output` marked as
+    // "aborted".  Keeping the ID in the set until the run concludes
+    // successfully lets the next `run()` differentiate between an aborted
+    // tool call (needs the synthetic output) and a completed one (cleared
+    // below in the `flush()` helper).
+
+    // used to tell model to stop if needed
+    const additionalItems: Array<ResponseInputItem> = [];
+
+    if (item.action.type !== "exec") {
+      throw new Error("Invalid action type");
+    }
+
+    const args = {
+      cmd: item.action.command,
+      workdir: item.action.working_directory,
+      timeoutInMillis: item.action.timeout_ms,
+    };
+
+    const {
+      outputText,
+      metadata,
+      additionalItems: additionalItemsFromExec,
+    } = await handleExecCommand(
+      args,
+      this.config,
+      this.approvalPolicy,
+      this.additionalWritableRoots,
+      this.getCommandConfirmation,
+      this.execAbortController?.signal,
+    );
+    outputItem.output = JSON.stringify({ output: outputText, metadata });
+
+    if (additionalItemsFromExec) {
+      additionalItems.push(...additionalItemsFromExec);
+    }
+
+    return [outputItem, ...additionalItems];
+  }
+
  public async run(
    input: Array<ResponseInputItem>,
    previousResponseId: string = "",
@@ -545,6 +618,11 @@ export class AgentLoop {
      // `disableResponseStorage === true`.
      let transcriptPrefixLen = 0;

+      let tools: Array<Tool> = [shellFunctionTool];
+      if (this.model.startsWith("codex")) {
+        tools = [localShellTool];
+      }
+
      const stripInternalFields = (
        item: ResponseInputItem,
      ): ResponseInputItem => {
@@ -648,6 +726,8 @@ export class AgentLoop {
                if (
                  (item as ResponseInputItem).type === "function_call" ||
                  (item as ResponseInputItem).type === "reasoning" ||
+                  //@ts-expect-error - waiting on sdk
+                  (item as ResponseInputItem).type === "local_shell_call" ||
                  ((item as ResponseInputItem).type === "message" &&
                    // eslint-disable-next-line @typescript-eslint/no-explicit-any
                    (item as any).role === "user")
@@ -748,7 +828,7 @@ export class AgentLoop {
                    store: true,
                    previous_response_id: lastResponseId || undefined,
                  }),
-              tools: [shellTool],
+              tools: tools,
              // Explicitly tell the model it is allowed to pick whatever
              // tool it deems appropriate.  Omitting this sometimes leads to
              // the model ignoring the available tools and responding with
@@ -968,7 +1048,10 @@ export class AgentLoop {
                if (maybeReasoning.type === "reasoning") {
                  maybeReasoning.duration_ms = Date.now() - thinkingStart;
                }
-                if (item.type === "function_call") {
+                if (
+                  item.type === "function_call" ||
+                  item.type === "local_shell_call"
+                ) {
                  // Track outstanding tool call so we can abort later if needed.
                  // The item comes from the streaming response, therefore it has
                  // either `id` (chat) or `call_id` (responses) – we normalise
@@ -1091,7 +1174,11 @@ export class AgentLoop {
              let reasoning: Reasoning | undefined;
              if (this.model.startsWith("o")) {
                reasoning = { effort: "high" };
-                if (this.model === "o3" || this.model === "o4-mini") {
+                if (
+                  this.model === "o3" ||
+                  this.model === "o4-mini" ||
+                  this.model === "codex-mini-latest"
+                ) {
                  reasoning.summary = "auto";
                }
              }
@@ -1130,7 +1217,7 @@ export class AgentLoop {
                      store: true,
                      previous_response_id: lastResponseId || undefined,
                    }),
-                tools: [shellTool],
+                tools: tools,
                tool_choice: "auto",
              });

@@ -1492,6 +1579,17 @@ export class AgentLoop {
        // eslint-disable-next-line no-await-in-loop
        const result = await this.handleFunctionCall(item);
        turnInput.push(...result);
+        //@ts-expect-error - waiting on sdk
+      } else if (item.type === "local_shell_call") {
+        //@ts-expect-error - waiting on sdk
+        if (alreadyProcessedResponses.has(item.id)) {
+          continue;
+        }
+        //@ts-expect-error - waiting on sdk
+        alreadyProcessedResponses.add(item.id);
+        // eslint-disable-next-line no-await-in-loop
+        const result = await this.handleLocalShellCall(item);
+        turnInput.push(...result);
      }
      emitItem(item as ResponseItem);
    }
--- a/codex-cli/src/utils/config.ts
+++ b/codex-cli/src/utils/config.ts
@@ -43,7 +43,7 @@ if (!isVitest) {
  loadDotenv({ path: USER_WIDE_CONFIG_PATH });
 }

-export const DEFAULT_AGENTIC_MODEL = "o4-mini";
+export const DEFAULT_AGENTIC_MODEL = "codex-mini-latest";
 export const DEFAULT_FULL_CONTEXT_MODEL = "gpt-4.1";
 export const DEFAULT_APPROVAL_MODE = AutoApprovalMode.SUGGEST;
 export const DEFAULT_INSTRUCTIONS = "";
--- a/codex-cli/src/utils/model-info.ts
+++ b/codex-cli/src/utils/model-info.ts
@@ -19,6 +19,10 @@ export const openAiModelInfo = {
    label: "o3 (2025-04-16)",
    maxContextLength: 200000,
  },
+  "codex-mini-latest": {
+    label: "codex-mini-latest",
+    maxContextLength: 200000,
+  },
  "o4-mini": {
    label: "o4 Mini",
    maxContextLength: 200000,
--- a/codex-cli/tests/config.test.tsx
+++ b/codex-cli/tests/config.test.tsx
@@ -67,7 +67,7 @@ test("loads default config if files don't exist", () => {
  });
  // Keep the test focused on just checking that default model and instructions are loaded
  // so we need to make sure we check just these properties
-  expect(config.model).toBe("o4-mini");
+  expect(config.model).toBe("codex-mini-latest");
  expect(config.instructions).toBe("");
 });

--- a/codex-cli/tests/disableResponseStorage.agentLoop.test.ts
+++ b/codex-cli/tests/disableResponseStorage.agentLoop.test.ts
@@ -29,7 +29,7 @@ describe.each([
 ])("AgentLoop with disableResponseStorage=%s", ({ flag, title }) => {
  /* build a fresh config for each case */
  const cfg: AppConfig = {
-    model: "o4-mini",
+    model: "codex-mini-latest",
    provider: "openai",
    instructions: "",
    disableResponseStorage: flag,
--- a/codex-cli/tests/disableResponseStorage.test.ts
+++ b/codex-cli/tests/disableResponseStorage.test.ts
@@ -21,7 +21,10 @@ describe("disableResponseStorage persistence", () => {
    mkdirSync(codexDir, { recursive: true });

    // seed YAML with ZDR enabled
-    writeFileSync(yamlPath, "model: o4-mini\ndisableResponseStorage: true\n");
+    writeFileSync(
+      yamlPath,
+      "model: codex-mini-latest\ndisableResponseStorage: true\n",
+    );
  });

  afterAll((): void => {
--- a/codex-cli/tests/model-utils-network-error.test.ts
+++ b/codex-cli/tests/model-utils-network-error.test.ts
@@ -44,7 +44,10 @@ describe("model-utils – offline resilience", () => {
      "../src/utils/model-utils.js"
    );

-    const supported = await isModelSupportedForResponses("openai", "o4-mini");
+    const supported = await isModelSupportedForResponses(
+      "openai",
+      "codex-mini-latest",
+    );
    expect(supported).toBe(true);
  });