From 1a8610cd9ec829ceb98a52157a16401199f9ce68 Mon Sep 17 00:00:00 2001 From: salama-openai Date: Fri, 18 Apr 2025 22:15:01 -0700 Subject: [PATCH] feat: add flex mode option for cost savings (#372) Adding in an option to turn on flex processing mode to reduce costs when running the agent. Bumped the openai typescript version to add the new feature. --------- Co-authored-by: Thibault Sottiaux --- codex-cli/src/cli.tsx | 24 +++++++++++++++++++ .../src/components/chat/terminal-chat.tsx | 15 ++++++++++-- .../src/components/chat/terminal-header.tsx | 9 +++++++ .../src/components/singlepass-cli-app.tsx | 1 + codex-cli/src/utils/agent/agent-loop.ts | 1 + codex-cli/src/utils/compact-summary.ts | 9 +++++++ codex-cli/src/utils/config.ts | 3 +++ 7 files changed, 60 insertions(+), 2 deletions(-) diff --git a/codex-cli/src/cli.tsx b/codex-cli/src/cli.tsx index 2f2b8c09..7c8e3d0d 100644 --- a/codex-cli/src/cli.tsx +++ b/codex-cli/src/cli.tsx @@ -71,6 +71,9 @@ const cli = meow( --full-stdout Do not truncate stdout/stderr from command outputs --notify Enable desktop notifications for responses + --flex-mode Use "flex-mode" processing mode for the request (only supported + with models o3 and o4-mini) + Dangerous options --dangerously-auto-approve-everything Skip all confirmation prompts and execute commands without @@ -140,6 +143,11 @@ const cli = meow( type: "string", description: "Path to a markdown file to include as project doc", }, + flexMode: { + type: "boolean", + description: + "Enable the flex-mode service tier (only supported by models o3 and o4-mini)", + }, fullStdout: { type: "boolean", description: @@ -250,12 +258,28 @@ config = { apiKey, ...config, model: model ?? config.model, + flexMode: Boolean(cli.flags.flexMode), notify: Boolean(cli.flags.notify), }; // Check for updates after loading config // This is important because we write state file in the config dir await checkForUpdates().catch(); +// --------------------------------------------------------------------------- +// --flex-mode validation (only allowed for o3 and o4-mini) +// --------------------------------------------------------------------------- + +if (cli.flags.flexMode) { + const allowedFlexModels = new Set(["o3", "o4-mini"]); + if (!allowedFlexModels.has(config.model)) { + // eslint-disable-next-line no-console + console.error( + `The --flex-mode option is only supported when using the 'o3' or 'o4-mini' models. ` + + `Current model: '${config.model}'.`, + ); + process.exit(1); + } +} if (!(await isModelSupportedForResponses(config.model))) { // eslint-disable-next-line no-console diff --git a/codex-cli/src/components/chat/terminal-chat.tsx b/codex-cli/src/components/chat/terminal-chat.tsx index d2cb64c4..74112da2 100644 --- a/codex-cli/src/components/chat/terminal-chat.tsx +++ b/codex-cli/src/components/chat/terminal-chat.tsx @@ -59,6 +59,7 @@ const colorsByPolicy: Record = { async function generateCommandExplanation( command: Array, model: string, + flexMode: boolean, ): Promise { try { // Create a temporary OpenAI client @@ -73,6 +74,7 @@ async function generateCommandExplanation( // Create a prompt that asks for an explanation with a more detailed system prompt const response = await oai.chat.completions.create({ model, + ...(flexMode ? { service_tier: "flex" } : {}), messages: [ { role: "system", @@ -142,7 +144,11 @@ export default function TerminalChat({ const handleCompact = async () => { setLoading(true); try { - const summary = await generateCompactSummary(items, model); + const summary = await generateCompactSummary( + items, + model, + Boolean(config.flexMode), + ); setItems([ { id: `compact-${Date.now()}`, @@ -245,7 +251,11 @@ export default function TerminalChat({ log(`Generating explanation for command: ${commandForDisplay}`); // Generate an explanation using the same model - const explanation = await generateCommandExplanation(command, model); + const explanation = await generateCommandExplanation( + command, + model, + Boolean(config.flexMode), + ); log(`Generated explanation: ${explanation}`); // Ask for confirmation again, but with the explanation @@ -453,6 +463,7 @@ export default function TerminalChat({ colorsByPolicy, agent, initialImagePaths, + flexModeEnabled: Boolean(config.flexMode), }} /> ) : ( diff --git a/codex-cli/src/components/chat/terminal-header.tsx b/codex-cli/src/components/chat/terminal-header.tsx index 3c8e3089..4c0ed2e1 100644 --- a/codex-cli/src/components/chat/terminal-header.tsx +++ b/codex-cli/src/components/chat/terminal-header.tsx @@ -13,6 +13,7 @@ export interface TerminalHeaderProps { colorsByPolicy: Record; agent?: AgentLoop; initialImagePaths?: Array; + flexModeEnabled?: boolean; } const TerminalHeader: React.FC = ({ @@ -24,6 +25,7 @@ const TerminalHeader: React.FC = ({ colorsByPolicy, agent, initialImagePaths, + flexModeEnabled = false, }) => { return ( <> @@ -32,6 +34,7 @@ const TerminalHeader: React.FC = ({ ● Codex v{version} – {PWD} – {model} –{" "} {approvalPolicy} + {flexModeEnabled ? " – flex-mode" : ""} ) : ( <> @@ -68,6 +71,12 @@ const TerminalHeader: React.FC = ({ {approvalPolicy} + {flexModeEnabled && ( + + flex-mode:{" "} + enabled + + )} {initialImagePaths?.map((img, idx) => ( image:{" "} diff --git a/codex-cli/src/components/singlepass-cli-app.tsx b/codex-cli/src/components/singlepass-cli-app.tsx index 63982bf4..5d649424 100644 --- a/codex-cli/src/components/singlepass-cli-app.tsx +++ b/codex-cli/src/components/singlepass-cli-app.tsx @@ -400,6 +400,7 @@ export function SinglePassApp({ }); const chatResp = await openai.beta.chat.completions.parse({ model: config.model, + ...(config.flexMode ? { service_tier: "flex" } : {}), messages: [ { role: "user", diff --git a/codex-cli/src/utils/agent/agent-loop.ts b/codex-cli/src/utils/agent/agent-loop.ts index 67d775f2..044715df 100644 --- a/codex-cli/src/utils/agent/agent-loop.ts +++ b/codex-cli/src/utils/agent/agent-loop.ts @@ -516,6 +516,7 @@ export class AgentLoop { stream: true, parallel_tool_calls: false, reasoning, + ...(this.config.flexMode ? { service_tier: "flex" } : {}), tools: [ { type: "function", diff --git a/codex-cli/src/utils/compact-summary.ts b/codex-cli/src/utils/compact-summary.ts index 81474396..040145da 100644 --- a/codex-cli/src/utils/compact-summary.ts +++ b/codex-cli/src/utils/compact-summary.ts @@ -9,9 +9,17 @@ import OpenAI from "openai"; * @param model The model to use for generating the summary * @returns A concise structured summary string */ +/** + * Generate a condensed summary of the conversation items. + * @param items The list of conversation items to summarize + * @param model The model to use for generating the summary + * @param flexMode Whether to use the flex-mode service tier + * @returns A concise structured summary string + */ export async function generateCompactSummary( items: Array, model: string, + flexMode = false, ): Promise { const oai = new OpenAI({ apiKey: process.env["OPENAI_API_KEY"], @@ -44,6 +52,7 @@ export async function generateCompactSummary( const response = await oai.chat.completions.create({ model, + ...(flexMode ? { service_tier: "flex" } : {}), messages: [ { role: "assistant", diff --git a/codex-cli/src/utils/config.ts b/codex-cli/src/utils/config.ts index 309256e9..be28eebe 100644 --- a/codex-cli/src/utils/config.ts +++ b/codex-cli/src/utils/config.ts @@ -79,6 +79,9 @@ export type AppConfig = { memory?: MemoryConfig; /** Whether to enable desktop notifications for responses */ notify: boolean; + + /** Enable the "flex-mode" processing mode for supported models (o3, o4-mini) */ + flexMode?: boolean; history?: { maxSize: number; saveHistory: boolean;