From 1a8610cd9ec829ceb98a52157a16401199f9ce68 Mon Sep 17 00:00:00 2001
From: salama-openai <salama@openai.com>
Date: Fri, 18 Apr 2025 22:15:01 -0700
Subject: [PATCH] feat: add flex mode option for cost savings (#372)

Adding in an option to turn on flex processing mode to reduce costs when
running the agent.

Bumped the openai typescript version to add the new feature.

---------

Co-authored-by: Thibault Sottiaux <tibo@openai.com>
---
 codex-cli/src/cli.tsx                         | 24 +++++++++++++++++++
 .../src/components/chat/terminal-chat.tsx     | 15 ++++++++++--
 .../src/components/chat/terminal-header.tsx   |  9 +++++++
 .../src/components/singlepass-cli-app.tsx     |  1 +
 codex-cli/src/utils/agent/agent-loop.ts       |  1 +
 codex-cli/src/utils/compact-summary.ts        |  9 +++++++
 codex-cli/src/utils/config.ts                 |  3 +++
 7 files changed, 60 insertions(+), 2 deletions(-)
diff --git a/codex-cli/src/cli.tsx b/codex-cli/src/cli.tsx
index 2f2b8c09..7c8e3d0d 100644
--- a/codex-cli/src/cli.tsx
+++ b/codex-cli/src/cli.tsx
@@ -71,6 +71,9 @@ const cli = meow(
     --full-stdout              Do not truncate stdout/stderr from command outputs
     --notify                   Enable desktop notifications for responses
 
+    --flex-mode               Use "flex-mode" processing mode for the request (only supported
+                              with models o3 and o4-mini)
+
   Dangerous options
     --dangerously-auto-approve-everything
                                Skip all confirmation prompts and execute commands without
@@ -140,6 +143,11 @@ const cli = meow(
         type: "string",
         description: "Path to a markdown file to include as project doc",
       },
+      flexMode: {
+        type: "boolean",
+        description:
+          "Enable the flex-mode service tier (only supported by models o3 and o4-mini)",
+      },
       fullStdout: {
         type: "boolean",
         description:
@@ -250,12 +258,28 @@ config = {
   apiKey,
   ...config,
   model: model ?? config.model,
+  flexMode: Boolean(cli.flags.flexMode),
   notify: Boolean(cli.flags.notify),
 };
 
 // Check for updates after loading config
 // This is important because we write state file in the config dir
 await checkForUpdates().catch();
+// ---------------------------------------------------------------------------
+// --flex-mode validation (only allowed for o3 and o4-mini)
+// ---------------------------------------------------------------------------
+
+if (cli.flags.flexMode) {
+  const allowedFlexModels = new Set(["o3", "o4-mini"]);
+  if (!allowedFlexModels.has(config.model)) {
+    // eslint-disable-next-line no-console
+    console.error(
+      `The --flex-mode option is only supported when using the 'o3' or 'o4-mini' models. ` +
+        `Current model: '${config.model}'.`,
+    );
+    process.exit(1);
+  }
+}
 
 if (!(await isModelSupportedForResponses(config.model))) {
   // eslint-disable-next-line no-console
diff --git a/codex-cli/src/components/chat/terminal-chat.tsx b/codex-cli/src/components/chat/terminal-chat.tsx
index d2cb64c4..74112da2 100644
--- a/codex-cli/src/components/chat/terminal-chat.tsx
+++ b/codex-cli/src/components/chat/terminal-chat.tsx
@@ -59,6 +59,7 @@ const colorsByPolicy: Record<ApprovalPolicy, ColorName | undefined> = {
 async function generateCommandExplanation(
   command: Array<string>,
   model: string,
+  flexMode: boolean,
 ): Promise<string> {
   try {
     // Create a temporary OpenAI client
@@ -73,6 +74,7 @@ async function generateCommandExplanation(
     // Create a prompt that asks for an explanation with a more detailed system prompt
     const response = await oai.chat.completions.create({
       model,
+      ...(flexMode ? { service_tier: "flex" } : {}),
       messages: [
         {
           role: "system",
@@ -142,7 +144,11 @@ export default function TerminalChat({
   const handleCompact = async () => {
     setLoading(true);
     try {
-      const summary = await generateCompactSummary(items, model);
+      const summary = await generateCompactSummary(
+        items,
+        model,
+        Boolean(config.flexMode),
+      );
       setItems([
         {
           id: `compact-${Date.now()}`,
@@ -245,7 +251,11 @@ export default function TerminalChat({
           log(`Generating explanation for command: ${commandForDisplay}`);
 
           // Generate an explanation using the same model
-          const explanation = await generateCommandExplanation(command, model);
+          const explanation = await generateCommandExplanation(
+            command,
+            model,
+            Boolean(config.flexMode),
+          );
           log(`Generated explanation: ${explanation}`);
 
           // Ask for confirmation again, but with the explanation
@@ -453,6 +463,7 @@ export default function TerminalChat({
               colorsByPolicy,
               agent,
               initialImagePaths,
+              flexModeEnabled: Boolean(config.flexMode),
             }}
           />
         ) : (
diff --git a/codex-cli/src/components/chat/terminal-header.tsx b/codex-cli/src/components/chat/terminal-header.tsx
index 3c8e3089..4c0ed2e1 100644
--- a/codex-cli/src/components/chat/terminal-header.tsx
+++ b/codex-cli/src/components/chat/terminal-header.tsx
@@ -13,6 +13,7 @@ export interface TerminalHeaderProps {
   colorsByPolicy: Record<string, string | undefined>;
   agent?: AgentLoop;
   initialImagePaths?: Array<string>;
+  flexModeEnabled?: boolean;
 }
 
 const TerminalHeader: React.FC<TerminalHeaderProps> = ({
@@ -24,6 +25,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
   colorsByPolicy,
   agent,
   initialImagePaths,
+  flexModeEnabled = false,
 }) => {
   return (
     <>
@@ -32,6 +34,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
         <Text>
           ● Codex v{version} – {PWD} – {model} –{" "}
           <Text color={colorsByPolicy[approvalPolicy]}>{approvalPolicy}</Text>
+          {flexModeEnabled ? " – flex-mode" : ""}
         </Text>
       ) : (
         <>
@@ -68,6 +71,12 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
                 {approvalPolicy}
               </Text>
             </Text>
+            {flexModeEnabled && (
+              <Text dimColor>
+                <Text color="blueBright">↳</Text> flex-mode:{" "}
+                <Text bold>enabled</Text>
+              </Text>
+            )}
             {initialImagePaths?.map((img, idx) => (
               <Text key={img ?? idx} color="gray">
                 <Text color="blueBright">↳</Text> image:{" "}
diff --git a/codex-cli/src/components/singlepass-cli-app.tsx b/codex-cli/src/components/singlepass-cli-app.tsx
index 63982bf4..5d649424 100644
--- a/codex-cli/src/components/singlepass-cli-app.tsx
+++ b/codex-cli/src/components/singlepass-cli-app.tsx
@@ -400,6 +400,7 @@ export function SinglePassApp({
       });
       const chatResp = await openai.beta.chat.completions.parse({
         model: config.model,
+        ...(config.flexMode ? { service_tier: "flex" } : {}),
         messages: [
           {
             role: "user",
diff --git a/codex-cli/src/utils/agent/agent-loop.ts b/codex-cli/src/utils/agent/agent-loop.ts
index 67d775f2..044715df 100644
--- a/codex-cli/src/utils/agent/agent-loop.ts
+++ b/codex-cli/src/utils/agent/agent-loop.ts
@@ -516,6 +516,7 @@ export class AgentLoop {
               stream: true,
               parallel_tool_calls: false,
               reasoning,
+              ...(this.config.flexMode ? { service_tier: "flex" } : {}),
               tools: [
                 {
                   type: "function",
diff --git a/codex-cli/src/utils/compact-summary.ts b/codex-cli/src/utils/compact-summary.ts
index 81474396..040145da 100644
--- a/codex-cli/src/utils/compact-summary.ts
+++ b/codex-cli/src/utils/compact-summary.ts
@@ -9,9 +9,17 @@ import OpenAI from "openai";
  * @param model The model to use for generating the summary
  * @returns A concise structured summary string
  */
+/**
+ * Generate a condensed summary of the conversation items.
+ * @param items The list of conversation items to summarize
+ * @param model The model to use for generating the summary
+ * @param flexMode Whether to use the flex-mode service tier
+ * @returns A concise structured summary string
+ */
 export async function generateCompactSummary(
   items: Array<ResponseItem>,
   model: string,
+  flexMode = false,
 ): Promise<string> {
   const oai = new OpenAI({
     apiKey: process.env["OPENAI_API_KEY"],
@@ -44,6 +52,7 @@ export async function generateCompactSummary(
 
   const response = await oai.chat.completions.create({
     model,
+    ...(flexMode ? { service_tier: "flex" } : {}),
     messages: [
       {
         role: "assistant",
diff --git a/codex-cli/src/utils/config.ts b/codex-cli/src/utils/config.ts
index 309256e9..be28eebe 100644
--- a/codex-cli/src/utils/config.ts
+++ b/codex-cli/src/utils/config.ts
@@ -79,6 +79,9 @@ export type AppConfig = {
   memory?: MemoryConfig;
   /** Whether to enable desktop notifications for responses */
   notify: boolean;
+
+  /** Enable the "flex-mode" processing mode for supported models (o3, o4-mini) */
+  flexMode?: boolean;
   history?: {
     maxSize: number;
     saveHistory: boolean;