feat: add flex mode option for cost savings (#372)

Adding in an option to turn on flex processing mode to reduce costs when
running the agent.

Bumped the openai typescript version to add the new feature.

---------

Co-authored-by: Thibault Sottiaux <tibo@openai.com>
This commit is contained in:
salama-openai
2025-04-18 22:15:01 -07:00
committed by GitHub
parent 6f2271e8cd
commit 1a8610cd9e
7 changed files with 60 additions and 2 deletions

View File

@@ -71,6 +71,9 @@ const cli = meow(
--full-stdout Do not truncate stdout/stderr from command outputs --full-stdout Do not truncate stdout/stderr from command outputs
--notify Enable desktop notifications for responses --notify Enable desktop notifications for responses
--flex-mode Use "flex-mode" processing mode for the request (only supported
with models o3 and o4-mini)
Dangerous options Dangerous options
--dangerously-auto-approve-everything --dangerously-auto-approve-everything
Skip all confirmation prompts and execute commands without Skip all confirmation prompts and execute commands without
@@ -140,6 +143,11 @@ const cli = meow(
type: "string", type: "string",
description: "Path to a markdown file to include as project doc", description: "Path to a markdown file to include as project doc",
}, },
flexMode: {
type: "boolean",
description:
"Enable the flex-mode service tier (only supported by models o3 and o4-mini)",
},
fullStdout: { fullStdout: {
type: "boolean", type: "boolean",
description: description:
@@ -250,12 +258,28 @@ config = {
apiKey, apiKey,
...config, ...config,
model: model ?? config.model, model: model ?? config.model,
flexMode: Boolean(cli.flags.flexMode),
notify: Boolean(cli.flags.notify), notify: Boolean(cli.flags.notify),
}; };
// Check for updates after loading config // Check for updates after loading config
// This is important because we write state file in the config dir // This is important because we write state file in the config dir
await checkForUpdates().catch(); await checkForUpdates().catch();
// ---------------------------------------------------------------------------
// --flex-mode validation (only allowed for o3 and o4-mini)
// ---------------------------------------------------------------------------
if (cli.flags.flexMode) {
const allowedFlexModels = new Set(["o3", "o4-mini"]);
if (!allowedFlexModels.has(config.model)) {
// eslint-disable-next-line no-console
console.error(
`The --flex-mode option is only supported when using the 'o3' or 'o4-mini' models. ` +
`Current model: '${config.model}'.`,
);
process.exit(1);
}
}
if (!(await isModelSupportedForResponses(config.model))) { if (!(await isModelSupportedForResponses(config.model))) {
// eslint-disable-next-line no-console // eslint-disable-next-line no-console

View File

@@ -59,6 +59,7 @@ const colorsByPolicy: Record<ApprovalPolicy, ColorName | undefined> = {
async function generateCommandExplanation( async function generateCommandExplanation(
command: Array<string>, command: Array<string>,
model: string, model: string,
flexMode: boolean,
): Promise<string> { ): Promise<string> {
try { try {
// Create a temporary OpenAI client // Create a temporary OpenAI client
@@ -73,6 +74,7 @@ async function generateCommandExplanation(
// Create a prompt that asks for an explanation with a more detailed system prompt // Create a prompt that asks for an explanation with a more detailed system prompt
const response = await oai.chat.completions.create({ const response = await oai.chat.completions.create({
model, model,
...(flexMode ? { service_tier: "flex" } : {}),
messages: [ messages: [
{ {
role: "system", role: "system",
@@ -142,7 +144,11 @@ export default function TerminalChat({
const handleCompact = async () => { const handleCompact = async () => {
setLoading(true); setLoading(true);
try { try {
const summary = await generateCompactSummary(items, model); const summary = await generateCompactSummary(
items,
model,
Boolean(config.flexMode),
);
setItems([ setItems([
{ {
id: `compact-${Date.now()}`, id: `compact-${Date.now()}`,
@@ -245,7 +251,11 @@ export default function TerminalChat({
log(`Generating explanation for command: ${commandForDisplay}`); log(`Generating explanation for command: ${commandForDisplay}`);
// Generate an explanation using the same model // Generate an explanation using the same model
const explanation = await generateCommandExplanation(command, model); const explanation = await generateCommandExplanation(
command,
model,
Boolean(config.flexMode),
);
log(`Generated explanation: ${explanation}`); log(`Generated explanation: ${explanation}`);
// Ask for confirmation again, but with the explanation // Ask for confirmation again, but with the explanation
@@ -453,6 +463,7 @@ export default function TerminalChat({
colorsByPolicy, colorsByPolicy,
agent, agent,
initialImagePaths, initialImagePaths,
flexModeEnabled: Boolean(config.flexMode),
}} }}
/> />
) : ( ) : (

View File

@@ -13,6 +13,7 @@ export interface TerminalHeaderProps {
colorsByPolicy: Record<string, string | undefined>; colorsByPolicy: Record<string, string | undefined>;
agent?: AgentLoop; agent?: AgentLoop;
initialImagePaths?: Array<string>; initialImagePaths?: Array<string>;
flexModeEnabled?: boolean;
} }
const TerminalHeader: React.FC<TerminalHeaderProps> = ({ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
@@ -24,6 +25,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
colorsByPolicy, colorsByPolicy,
agent, agent,
initialImagePaths, initialImagePaths,
flexModeEnabled = false,
}) => { }) => {
return ( return (
<> <>
@@ -32,6 +34,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
<Text> <Text>
Codex v{version} {PWD} {model} {" "} Codex v{version} {PWD} {model} {" "}
<Text color={colorsByPolicy[approvalPolicy]}>{approvalPolicy}</Text> <Text color={colorsByPolicy[approvalPolicy]}>{approvalPolicy}</Text>
{flexModeEnabled ? " flex-mode" : ""}
</Text> </Text>
) : ( ) : (
<> <>
@@ -68,6 +71,12 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
{approvalPolicy} {approvalPolicy}
</Text> </Text>
</Text> </Text>
{flexModeEnabled && (
<Text dimColor>
<Text color="blueBright"></Text> flex-mode:{" "}
<Text bold>enabled</Text>
</Text>
)}
{initialImagePaths?.map((img, idx) => ( {initialImagePaths?.map((img, idx) => (
<Text key={img ?? idx} color="gray"> <Text key={img ?? idx} color="gray">
<Text color="blueBright"></Text> image:{" "} <Text color="blueBright"></Text> image:{" "}

View File

@@ -400,6 +400,7 @@ export function SinglePassApp({
}); });
const chatResp = await openai.beta.chat.completions.parse({ const chatResp = await openai.beta.chat.completions.parse({
model: config.model, model: config.model,
...(config.flexMode ? { service_tier: "flex" } : {}),
messages: [ messages: [
{ {
role: "user", role: "user",

View File

@@ -516,6 +516,7 @@ export class AgentLoop {
stream: true, stream: true,
parallel_tool_calls: false, parallel_tool_calls: false,
reasoning, reasoning,
...(this.config.flexMode ? { service_tier: "flex" } : {}),
tools: [ tools: [
{ {
type: "function", type: "function",

View File

@@ -9,9 +9,17 @@ import OpenAI from "openai";
* @param model The model to use for generating the summary * @param model The model to use for generating the summary
* @returns A concise structured summary string * @returns A concise structured summary string
*/ */
/**
* Generate a condensed summary of the conversation items.
* @param items The list of conversation items to summarize
* @param model The model to use for generating the summary
* @param flexMode Whether to use the flex-mode service tier
* @returns A concise structured summary string
*/
export async function generateCompactSummary( export async function generateCompactSummary(
items: Array<ResponseItem>, items: Array<ResponseItem>,
model: string, model: string,
flexMode = false,
): Promise<string> { ): Promise<string> {
const oai = new OpenAI({ const oai = new OpenAI({
apiKey: process.env["OPENAI_API_KEY"], apiKey: process.env["OPENAI_API_KEY"],
@@ -44,6 +52,7 @@ export async function generateCompactSummary(
const response = await oai.chat.completions.create({ const response = await oai.chat.completions.create({
model, model,
...(flexMode ? { service_tier: "flex" } : {}),
messages: [ messages: [
{ {
role: "assistant", role: "assistant",

View File

@@ -79,6 +79,9 @@ export type AppConfig = {
memory?: MemoryConfig; memory?: MemoryConfig;
/** Whether to enable desktop notifications for responses */ /** Whether to enable desktop notifications for responses */
notify: boolean; notify: boolean;
/** Enable the "flex-mode" processing mode for supported models (o3, o4-mini) */
flexMode?: boolean;
history?: { history?: {
maxSize: number; maxSize: number;
saveHistory: boolean; saveHistory: boolean;