feat: add flex mode option for cost savings (#372)

Adding in an option to turn on flex processing mode to reduce costs when running the agent. Bumped the openai typescript version to add the new feature. --------- Co-authored-by: Thibault Sottiaux <tibo@openai.com>
2025-04-18 22:15:01 -07:00
parent 6f2271e8cd
commit 1a8610cd9e
7 changed files with 60 additions and 2 deletions
--- a/codex-cli/src/utils/agent/agent-loop.ts
+++ b/codex-cli/src/utils/agent/agent-loop.ts
@@ -516,6 +516,7 @@ export class AgentLoop {
              stream: true,
              parallel_tool_calls: false,
              reasoning,
+              ...(this.config.flexMode ? { service_tier: "flex" } : {}),
              tools: [
                {
                  type: "function",
--- a/codex-cli/src/utils/compact-summary.ts
+++ b/codex-cli/src/utils/compact-summary.ts
@@ -9,9 +9,17 @@ import OpenAI from "openai";
 * @param model The model to use for generating the summary
 * @returns A concise structured summary string
 */
+/**
+ * Generate a condensed summary of the conversation items.
+ * @param items The list of conversation items to summarize
+ * @param model The model to use for generating the summary
+ * @param flexMode Whether to use the flex-mode service tier
+ * @returns A concise structured summary string
+ */
 export async function generateCompactSummary(
  items: Array<ResponseItem>,
  model: string,
+  flexMode = false,
 ): Promise<string> {
  const oai = new OpenAI({
    apiKey: process.env["OPENAI_API_KEY"],
@@ -44,6 +52,7 @@ export async function generateCompactSummary(

  const response = await oai.chat.completions.create({
    model,
+    ...(flexMode ? { service_tier: "flex" } : {}),
    messages: [
      {
        role: "assistant",
--- a/codex-cli/src/utils/config.ts
+++ b/codex-cli/src/utils/config.ts
@@ -79,6 +79,9 @@ export type AppConfig = {
  memory?: MemoryConfig;
  /** Whether to enable desktop notifications for responses */
  notify: boolean;
+
+  /** Enable the "flex-mode" processing mode for supported models (o3, o4-mini) */
+  flexMode?: boolean;
  history?: {
    maxSize: number;
    saveHistory: boolean;