diff --git a/codex-cli/src/cli.tsx b/codex-cli/src/cli.tsx index d53ed07f..c009bb8a 100644 --- a/codex-cli/src/cli.tsx +++ b/codex-cli/src/cli.tsx @@ -56,7 +56,7 @@ const cli = meow( --version Print version and exit -h, --help Show usage and exit - -m, --model Model to use for completions (default: o4-mini) + -m, --model Model to use for completions (default: codex-mini-latest) -p, --provider Provider to use for completions (default: openai) -i, --image Path(s) to image files to include as input -v, --view Inspect a previously saved rollout instead of starting a session diff --git a/codex-cli/src/components/chat/terminal-chat-response-item.tsx b/codex-cli/src/components/chat/terminal-chat-response-item.tsx index bda4fea9..bab4aa31 100644 --- a/codex-cli/src/components/chat/terminal-chat-response-item.tsx +++ b/codex-cli/src/components/chat/terminal-chat-response-item.tsx @@ -19,6 +19,7 @@ import { parse, setOptions } from "marked"; import TerminalRenderer from "marked-terminal"; import path from "path"; import React, { useEffect, useMemo } from "react"; +import { formatCommandForDisplay } from "src/format-command.js"; import supportsHyperlinks from "supports-hyperlinks"; export default function TerminalChatResponseItem({ @@ -41,8 +42,12 @@ export default function TerminalChatResponseItem({ fileOpener={fileOpener} /> ); + // @ts-expect-error new item types aren't in SDK yet + case "local_shell_call": case "function_call": return ; + // @ts-expect-error new item types aren't in SDK yet + case "local_shell_call_output": case "function_call_output": return ( command - {details?.workdir ? ( - {` (${details?.workdir})`} - ) : ( - "" - )} + {workdir ? {` (${workdir})`} : ""} - $ {details?.cmdReadableText} + $ {cmdReadableText} ); @@ -190,7 +202,8 @@ function TerminalChatResponseToolCallOutput({ message, fullStdout, }: { - message: ResponseFunctionToolCallOutputItem; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + message: ResponseFunctionToolCallOutputItem | any; fullStdout: boolean; }) { const { output, metadata } = parseToolCallOutput(message.output); diff --git a/codex-cli/src/utils/agent/agent-loop.ts b/codex-cli/src/utils/agent/agent-loop.ts index 6f04401d..9198e7fd 100644 --- a/codex-cli/src/utils/agent/agent-loop.ts +++ b/codex-cli/src/utils/agent/agent-loop.ts @@ -8,6 +8,7 @@ import type { ResponseItem, ResponseCreateParams, FunctionTool, + Tool, } from "openai/resources/responses/responses.mjs"; import type { Reasoning } from "openai/resources.mjs"; @@ -84,7 +85,7 @@ type AgentLoopParams = { onLastResponseId: (lastResponseId: string) => void; }; -const shellTool: FunctionTool = { +const shellFunctionTool: FunctionTool = { type: "function", name: "shell", description: "Runs a shell command, and returns its output.", @@ -108,6 +109,11 @@ const shellTool: FunctionTool = { }, }; +const localShellTool: Tool = { + //@ts-expect-error - waiting on sdk + type: "local_shell", +}; + export class AgentLoop { private model: string; private provider: string; @@ -461,6 +467,73 @@ export class AgentLoop { return [outputItem, ...additionalItems]; } + private async handleLocalShellCall( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + item: any, + ): Promise> { + // If the agent has been canceled in the meantime we should not perform any + // additional work. Returning an empty array ensures that we neither execute + // the requested tool call nor enqueue any follow‑up input items. This keeps + // the cancellation semantics intuitive for users – once they interrupt a + // task no further actions related to that task should be taken. + if (this.canceled) { + return []; + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const outputItem: any = { + type: "local_shell_call_output", + // `call_id` is mandatory – ensure we never send `undefined` which would + // trigger the "No tool output found…" 400 from the API. + call_id: item.call_id, + output: "no function found", + }; + + // We intentionally *do not* remove this `callId` from the `pendingAborts` + // set right away. The output produced below is only queued up for the + // *next* request to the OpenAI API – it has not been delivered yet. If + // the user presses ESC‑ESC (i.e. invokes `cancel()`) in the small window + // between queuing the result and the actual network call, we need to be + // able to surface a synthetic `function_call_output` marked as + // "aborted". Keeping the ID in the set until the run concludes + // successfully lets the next `run()` differentiate between an aborted + // tool call (needs the synthetic output) and a completed one (cleared + // below in the `flush()` helper). + + // used to tell model to stop if needed + const additionalItems: Array = []; + + if (item.action.type !== "exec") { + throw new Error("Invalid action type"); + } + + const args = { + cmd: item.action.command, + workdir: item.action.working_directory, + timeoutInMillis: item.action.timeout_ms, + }; + + const { + outputText, + metadata, + additionalItems: additionalItemsFromExec, + } = await handleExecCommand( + args, + this.config, + this.approvalPolicy, + this.additionalWritableRoots, + this.getCommandConfirmation, + this.execAbortController?.signal, + ); + outputItem.output = JSON.stringify({ output: outputText, metadata }); + + if (additionalItemsFromExec) { + additionalItems.push(...additionalItemsFromExec); + } + + return [outputItem, ...additionalItems]; + } + public async run( input: Array, previousResponseId: string = "", @@ -545,6 +618,11 @@ export class AgentLoop { // `disableResponseStorage === true`. let transcriptPrefixLen = 0; + let tools: Array = [shellFunctionTool]; + if (this.model.startsWith("codex")) { + tools = [localShellTool]; + } + const stripInternalFields = ( item: ResponseInputItem, ): ResponseInputItem => { @@ -648,6 +726,8 @@ export class AgentLoop { if ( (item as ResponseInputItem).type === "function_call" || (item as ResponseInputItem).type === "reasoning" || + //@ts-expect-error - waiting on sdk + (item as ResponseInputItem).type === "local_shell_call" || ((item as ResponseInputItem).type === "message" && // eslint-disable-next-line @typescript-eslint/no-explicit-any (item as any).role === "user") @@ -748,7 +828,7 @@ export class AgentLoop { store: true, previous_response_id: lastResponseId || undefined, }), - tools: [shellTool], + tools: tools, // Explicitly tell the model it is allowed to pick whatever // tool it deems appropriate. Omitting this sometimes leads to // the model ignoring the available tools and responding with @@ -968,7 +1048,10 @@ export class AgentLoop { if (maybeReasoning.type === "reasoning") { maybeReasoning.duration_ms = Date.now() - thinkingStart; } - if (item.type === "function_call") { + if ( + item.type === "function_call" || + item.type === "local_shell_call" + ) { // Track outstanding tool call so we can abort later if needed. // The item comes from the streaming response, therefore it has // either `id` (chat) or `call_id` (responses) – we normalise @@ -1091,7 +1174,11 @@ export class AgentLoop { let reasoning: Reasoning | undefined; if (this.model.startsWith("o")) { reasoning = { effort: "high" }; - if (this.model === "o3" || this.model === "o4-mini") { + if ( + this.model === "o3" || + this.model === "o4-mini" || + this.model === "codex-mini-latest" + ) { reasoning.summary = "auto"; } } @@ -1130,7 +1217,7 @@ export class AgentLoop { store: true, previous_response_id: lastResponseId || undefined, }), - tools: [shellTool], + tools: tools, tool_choice: "auto", }); @@ -1492,6 +1579,17 @@ export class AgentLoop { // eslint-disable-next-line no-await-in-loop const result = await this.handleFunctionCall(item); turnInput.push(...result); + //@ts-expect-error - waiting on sdk + } else if (item.type === "local_shell_call") { + //@ts-expect-error - waiting on sdk + if (alreadyProcessedResponses.has(item.id)) { + continue; + } + //@ts-expect-error - waiting on sdk + alreadyProcessedResponses.add(item.id); + // eslint-disable-next-line no-await-in-loop + const result = await this.handleLocalShellCall(item); + turnInput.push(...result); } emitItem(item as ResponseItem); } diff --git a/codex-cli/src/utils/config.ts b/codex-cli/src/utils/config.ts index aba99b13..95183937 100644 --- a/codex-cli/src/utils/config.ts +++ b/codex-cli/src/utils/config.ts @@ -43,7 +43,7 @@ if (!isVitest) { loadDotenv({ path: USER_WIDE_CONFIG_PATH }); } -export const DEFAULT_AGENTIC_MODEL = "o4-mini"; +export const DEFAULT_AGENTIC_MODEL = "codex-mini-latest"; export const DEFAULT_FULL_CONTEXT_MODEL = "gpt-4.1"; export const DEFAULT_APPROVAL_MODE = AutoApprovalMode.SUGGEST; export const DEFAULT_INSTRUCTIONS = ""; diff --git a/codex-cli/src/utils/model-info.ts b/codex-cli/src/utils/model-info.ts index bbe0cb36..50c899d0 100644 --- a/codex-cli/src/utils/model-info.ts +++ b/codex-cli/src/utils/model-info.ts @@ -19,6 +19,10 @@ export const openAiModelInfo = { label: "o3 (2025-04-16)", maxContextLength: 200000, }, + "codex-mini-latest": { + label: "codex-mini-latest", + maxContextLength: 200000, + }, "o4-mini": { label: "o4 Mini", maxContextLength: 200000, diff --git a/codex-cli/tests/config.test.tsx b/codex-cli/tests/config.test.tsx index 05703e7e..55c2297f 100644 --- a/codex-cli/tests/config.test.tsx +++ b/codex-cli/tests/config.test.tsx @@ -67,7 +67,7 @@ test("loads default config if files don't exist", () => { }); // Keep the test focused on just checking that default model and instructions are loaded // so we need to make sure we check just these properties - expect(config.model).toBe("o4-mini"); + expect(config.model).toBe("codex-mini-latest"); expect(config.instructions).toBe(""); }); diff --git a/codex-cli/tests/disableResponseStorage.agentLoop.test.ts b/codex-cli/tests/disableResponseStorage.agentLoop.test.ts index b891e89a..7305ff98 100644 --- a/codex-cli/tests/disableResponseStorage.agentLoop.test.ts +++ b/codex-cli/tests/disableResponseStorage.agentLoop.test.ts @@ -29,7 +29,7 @@ describe.each([ ])("AgentLoop with disableResponseStorage=%s", ({ flag, title }) => { /* build a fresh config for each case */ const cfg: AppConfig = { - model: "o4-mini", + model: "codex-mini-latest", provider: "openai", instructions: "", disableResponseStorage: flag, diff --git a/codex-cli/tests/disableResponseStorage.test.ts b/codex-cli/tests/disableResponseStorage.test.ts index 83c22450..e16fb447 100644 --- a/codex-cli/tests/disableResponseStorage.test.ts +++ b/codex-cli/tests/disableResponseStorage.test.ts @@ -21,7 +21,10 @@ describe("disableResponseStorage persistence", () => { mkdirSync(codexDir, { recursive: true }); // seed YAML with ZDR enabled - writeFileSync(yamlPath, "model: o4-mini\ndisableResponseStorage: true\n"); + writeFileSync( + yamlPath, + "model: codex-mini-latest\ndisableResponseStorage: true\n", + ); }); afterAll((): void => { diff --git a/codex-cli/tests/model-utils-network-error.test.ts b/codex-cli/tests/model-utils-network-error.test.ts index 537e7fdb..9e2718ba 100644 --- a/codex-cli/tests/model-utils-network-error.test.ts +++ b/codex-cli/tests/model-utils-network-error.test.ts @@ -44,7 +44,10 @@ describe("model-utils – offline resilience", () => { "../src/utils/model-utils.js" ); - const supported = await isModelSupportedForResponses("openai", "o4-mini"); + const supported = await isModelSupportedForResponses( + "openai", + "codex-mini-latest", + ); expect(supported).toBe(true); });