feat: add flex mode option for cost savings (#372)

Adding in an option to turn on flex processing mode to reduce costs when running the agent. Bumped the openai typescript version to add the new feature. --------- Co-authored-by: Thibault Sottiaux <tibo@openai.com>
2025-04-18 22:15:01 -07:00
parent 6f2271e8cd
commit 1a8610cd9e
7 changed files with 60 additions and 2 deletions
--- a/codex-cli/src/cli.tsx
+++ b/codex-cli/src/cli.tsx
@@ -71,6 +71,9 @@ const cli = meow(
    --full-stdout              Do not truncate stdout/stderr from command outputs
    --notify                   Enable desktop notifications for responses

+    --flex-mode               Use "flex-mode" processing mode for the request (only supported
+                              with models o3 and o4-mini)
+
  Dangerous options
    --dangerously-auto-approve-everything
                               Skip all confirmation prompts and execute commands without
@@ -140,6 +143,11 @@ const cli = meow(
        type: "string",
        description: "Path to a markdown file to include as project doc",
      },
+      flexMode: {
+        type: "boolean",
+        description:
+          "Enable the flex-mode service tier (only supported by models o3 and o4-mini)",
+      },
      fullStdout: {
        type: "boolean",
        description:
@@ -250,12 +258,28 @@ config = {
  apiKey,
  ...config,
  model: model ?? config.model,
+  flexMode: Boolean(cli.flags.flexMode),
  notify: Boolean(cli.flags.notify),
 };

 // Check for updates after loading config
 // This is important because we write state file in the config dir
 await checkForUpdates().catch();
+// ---------------------------------------------------------------------------
+// --flex-mode validation (only allowed for o3 and o4-mini)
+// ---------------------------------------------------------------------------
+
+if (cli.flags.flexMode) {
+  const allowedFlexModels = new Set(["o3", "o4-mini"]);
+  if (!allowedFlexModels.has(config.model)) {
+    // eslint-disable-next-line no-console
+    console.error(
+      `The --flex-mode option is only supported when using the 'o3' or 'o4-mini' models. ` +
+        `Current model: '${config.model}'.`,
+    );
+    process.exit(1);
+  }
+}

 if (!(await isModelSupportedForResponses(config.model))) {
  // eslint-disable-next-line no-console
--- a/codex-cli/src/components/chat/terminal-chat.tsx
+++ b/codex-cli/src/components/chat/terminal-chat.tsx
@@ -59,6 +59,7 @@ const colorsByPolicy: Record<ApprovalPolicy, ColorName | undefined> = {
 async function generateCommandExplanation(
  command: Array<string>,
  model: string,
+  flexMode: boolean,
 ): Promise<string> {
  try {
    // Create a temporary OpenAI client
@@ -73,6 +74,7 @@ async function generateCommandExplanation(
    // Create a prompt that asks for an explanation with a more detailed system prompt
    const response = await oai.chat.completions.create({
      model,
+      ...(flexMode ? { service_tier: "flex" } : {}),
      messages: [
        {
          role: "system",
@@ -142,7 +144,11 @@ export default function TerminalChat({
  const handleCompact = async () => {
    setLoading(true);
    try {
-      const summary = await generateCompactSummary(items, model);
+      const summary = await generateCompactSummary(
+        items,
+        model,
+        Boolean(config.flexMode),
+      );
      setItems([
        {
          id: `compact-${Date.now()}`,
@@ -245,7 +251,11 @@ export default function TerminalChat({
          log(`Generating explanation for command: ${commandForDisplay}`);

          // Generate an explanation using the same model
-          const explanation = await generateCommandExplanation(command, model);
+          const explanation = await generateCommandExplanation(
+            command,
+            model,
+            Boolean(config.flexMode),
+          );
          log(`Generated explanation: ${explanation}`);

          // Ask for confirmation again, but with the explanation
@@ -453,6 +463,7 @@ export default function TerminalChat({
              colorsByPolicy,
              agent,
              initialImagePaths,
+              flexModeEnabled: Boolean(config.flexMode),
            }}
          />
        ) : (
--- a/codex-cli/src/components/chat/terminal-header.tsx
+++ b/codex-cli/src/components/chat/terminal-header.tsx
@@ -13,6 +13,7 @@ export interface TerminalHeaderProps {
  colorsByPolicy: Record<string, string | undefined>;
  agent?: AgentLoop;
  initialImagePaths?: Array<string>;
+  flexModeEnabled?: boolean;
 }

 const TerminalHeader: React.FC<TerminalHeaderProps> = ({
@@ -24,6 +25,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
  colorsByPolicy,
  agent,
  initialImagePaths,
+  flexModeEnabled = false,
 }) => {
  return (
    <>
@@ -32,6 +34,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
        <Text>
          ● Codex v{version} – {PWD} – {model} –{" "}
          <Text color={colorsByPolicy[approvalPolicy]}>{approvalPolicy}</Text>
+          {flexModeEnabled ? " – flex-mode" : ""}
        </Text>
      ) : (
        <>
@@ -68,6 +71,12 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
                {approvalPolicy}
              </Text>
            </Text>
+            {flexModeEnabled && (
+              <Text dimColor>
+                <Text color="blueBright">↳</Text> flex-mode:{" "}
+                <Text bold>enabled</Text>
+              </Text>
+            )}
            {initialImagePaths?.map((img, idx) => (
              <Text key={img ?? idx} color="gray">
                <Text color="blueBright">↳</Text> image:{" "}
--- a/codex-cli/src/components/singlepass-cli-app.tsx
+++ b/codex-cli/src/components/singlepass-cli-app.tsx
@@ -400,6 +400,7 @@ export function SinglePassApp({
      });
      const chatResp = await openai.beta.chat.completions.parse({
        model: config.model,
+        ...(config.flexMode ? { service_tier: "flex" } : {}),
        messages: [
          {
            role: "user",
--- a/codex-cli/src/utils/agent/agent-loop.ts
+++ b/codex-cli/src/utils/agent/agent-loop.ts
@@ -516,6 +516,7 @@ export class AgentLoop {
              stream: true,
              parallel_tool_calls: false,
              reasoning,
+              ...(this.config.flexMode ? { service_tier: "flex" } : {}),
              tools: [
                {
                  type: "function",
--- a/codex-cli/src/utils/compact-summary.ts
+++ b/codex-cli/src/utils/compact-summary.ts
@@ -9,9 +9,17 @@ import OpenAI from "openai";
 * @param model The model to use for generating the summary
 * @returns A concise structured summary string
 */
+/**
+ * Generate a condensed summary of the conversation items.
+ * @param items The list of conversation items to summarize
+ * @param model The model to use for generating the summary
+ * @param flexMode Whether to use the flex-mode service tier
+ * @returns A concise structured summary string
+ */
 export async function generateCompactSummary(
  items: Array<ResponseItem>,
  model: string,
+  flexMode = false,
 ): Promise<string> {
  const oai = new OpenAI({
    apiKey: process.env["OPENAI_API_KEY"],
@@ -44,6 +52,7 @@ export async function generateCompactSummary(

  const response = await oai.chat.completions.create({
    model,
+    ...(flexMode ? { service_tier: "flex" } : {}),
    messages: [
      {
        role: "assistant",
--- a/codex-cli/src/utils/config.ts
+++ b/codex-cli/src/utils/config.ts
@@ -79,6 +79,9 @@ export type AppConfig = {
  memory?: MemoryConfig;
  /** Whether to enable desktop notifications for responses */
  notify: boolean;
+
+  /** Enable the "flex-mode" processing mode for supported models (o3, o4-mini) */
+  flexMode?: boolean;
  history?: {
    maxSize: number;
    saveHistory: boolean;