feat: add flex mode option for cost savings (#372)

Adding in an option to turn on flex processing mode to reduce costs when
running the agent.

Bumped the openai typescript version to add the new feature.

---------

Co-authored-by: Thibault Sottiaux <tibo@openai.com>
This commit is contained in:
salama-openai
2025-04-18 22:15:01 -07:00
committed by GitHub
parent 6f2271e8cd
commit 1a8610cd9e
7 changed files with 60 additions and 2 deletions

View File

@@ -71,6 +71,9 @@ const cli = meow(
--full-stdout Do not truncate stdout/stderr from command outputs
--notify Enable desktop notifications for responses
--flex-mode Use "flex-mode" processing mode for the request (only supported
with models o3 and o4-mini)
Dangerous options
--dangerously-auto-approve-everything
Skip all confirmation prompts and execute commands without
@@ -140,6 +143,11 @@ const cli = meow(
type: "string",
description: "Path to a markdown file to include as project doc",
},
flexMode: {
type: "boolean",
description:
"Enable the flex-mode service tier (only supported by models o3 and o4-mini)",
},
fullStdout: {
type: "boolean",
description:
@@ -250,12 +258,28 @@ config = {
apiKey,
...config,
model: model ?? config.model,
flexMode: Boolean(cli.flags.flexMode),
notify: Boolean(cli.flags.notify),
};
// Check for updates after loading config
// This is important because we write state file in the config dir
await checkForUpdates().catch();
// ---------------------------------------------------------------------------
// --flex-mode validation (only allowed for o3 and o4-mini)
// ---------------------------------------------------------------------------
if (cli.flags.flexMode) {
const allowedFlexModels = new Set(["o3", "o4-mini"]);
if (!allowedFlexModels.has(config.model)) {
// eslint-disable-next-line no-console
console.error(
`The --flex-mode option is only supported when using the 'o3' or 'o4-mini' models. ` +
`Current model: '${config.model}'.`,
);
process.exit(1);
}
}
if (!(await isModelSupportedForResponses(config.model))) {
// eslint-disable-next-line no-console

View File

@@ -59,6 +59,7 @@ const colorsByPolicy: Record<ApprovalPolicy, ColorName | undefined> = {
async function generateCommandExplanation(
command: Array<string>,
model: string,
flexMode: boolean,
): Promise<string> {
try {
// Create a temporary OpenAI client
@@ -73,6 +74,7 @@ async function generateCommandExplanation(
// Create a prompt that asks for an explanation with a more detailed system prompt
const response = await oai.chat.completions.create({
model,
...(flexMode ? { service_tier: "flex" } : {}),
messages: [
{
role: "system",
@@ -142,7 +144,11 @@ export default function TerminalChat({
const handleCompact = async () => {
setLoading(true);
try {
const summary = await generateCompactSummary(items, model);
const summary = await generateCompactSummary(
items,
model,
Boolean(config.flexMode),
);
setItems([
{
id: `compact-${Date.now()}`,
@@ -245,7 +251,11 @@ export default function TerminalChat({
log(`Generating explanation for command: ${commandForDisplay}`);
// Generate an explanation using the same model
const explanation = await generateCommandExplanation(command, model);
const explanation = await generateCommandExplanation(
command,
model,
Boolean(config.flexMode),
);
log(`Generated explanation: ${explanation}`);
// Ask for confirmation again, but with the explanation
@@ -453,6 +463,7 @@ export default function TerminalChat({
colorsByPolicy,
agent,
initialImagePaths,
flexModeEnabled: Boolean(config.flexMode),
}}
/>
) : (

View File

@@ -13,6 +13,7 @@ export interface TerminalHeaderProps {
colorsByPolicy: Record<string, string | undefined>;
agent?: AgentLoop;
initialImagePaths?: Array<string>;
flexModeEnabled?: boolean;
}
const TerminalHeader: React.FC<TerminalHeaderProps> = ({
@@ -24,6 +25,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
colorsByPolicy,
agent,
initialImagePaths,
flexModeEnabled = false,
}) => {
return (
<>
@@ -32,6 +34,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
<Text>
Codex v{version} {PWD} {model} {" "}
<Text color={colorsByPolicy[approvalPolicy]}>{approvalPolicy}</Text>
{flexModeEnabled ? " flex-mode" : ""}
</Text>
) : (
<>
@@ -68,6 +71,12 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
{approvalPolicy}
</Text>
</Text>
{flexModeEnabled && (
<Text dimColor>
<Text color="blueBright"></Text> flex-mode:{" "}
<Text bold>enabled</Text>
</Text>
)}
{initialImagePaths?.map((img, idx) => (
<Text key={img ?? idx} color="gray">
<Text color="blueBright"></Text> image:{" "}

View File

@@ -400,6 +400,7 @@ export function SinglePassApp({
});
const chatResp = await openai.beta.chat.completions.parse({
model: config.model,
...(config.flexMode ? { service_tier: "flex" } : {}),
messages: [
{
role: "user",

View File

@@ -516,6 +516,7 @@ export class AgentLoop {
stream: true,
parallel_tool_calls: false,
reasoning,
...(this.config.flexMode ? { service_tier: "flex" } : {}),
tools: [
{
type: "function",

View File

@@ -9,9 +9,17 @@ import OpenAI from "openai";
* @param model The model to use for generating the summary
* @returns A concise structured summary string
*/
/**
* Generate a condensed summary of the conversation items.
* @param items The list of conversation items to summarize
* @param model The model to use for generating the summary
* @param flexMode Whether to use the flex-mode service tier
* @returns A concise structured summary string
*/
export async function generateCompactSummary(
items: Array<ResponseItem>,
model: string,
flexMode = false,
): Promise<string> {
const oai = new OpenAI({
apiKey: process.env["OPENAI_API_KEY"],
@@ -44,6 +52,7 @@ export async function generateCompactSummary(
const response = await oai.chat.completions.create({
model,
...(flexMode ? { service_tier: "flex" } : {}),
messages: [
{
role: "assistant",

View File

@@ -79,6 +79,9 @@ export type AppConfig = {
memory?: MemoryConfig;
/** Whether to enable desktop notifications for responses */
notify: boolean;
/** Enable the "flex-mode" processing mode for supported models (o3, o4-mini) */
flexMode?: boolean;
history?: {
maxSize: number;
saveHistory: boolean;