feat: add flex mode option for cost savings (#372)
Adding in an option to turn on flex processing mode to reduce costs when running the agent. Bumped the openai typescript version to add the new feature. --------- Co-authored-by: Thibault Sottiaux <tibo@openai.com>
This commit is contained in:
@@ -71,6 +71,9 @@ const cli = meow(
|
||||
--full-stdout Do not truncate stdout/stderr from command outputs
|
||||
--notify Enable desktop notifications for responses
|
||||
|
||||
--flex-mode Use "flex-mode" processing mode for the request (only supported
|
||||
with models o3 and o4-mini)
|
||||
|
||||
Dangerous options
|
||||
--dangerously-auto-approve-everything
|
||||
Skip all confirmation prompts and execute commands without
|
||||
@@ -140,6 +143,11 @@ const cli = meow(
|
||||
type: "string",
|
||||
description: "Path to a markdown file to include as project doc",
|
||||
},
|
||||
flexMode: {
|
||||
type: "boolean",
|
||||
description:
|
||||
"Enable the flex-mode service tier (only supported by models o3 and o4-mini)",
|
||||
},
|
||||
fullStdout: {
|
||||
type: "boolean",
|
||||
description:
|
||||
@@ -250,12 +258,28 @@ config = {
|
||||
apiKey,
|
||||
...config,
|
||||
model: model ?? config.model,
|
||||
flexMode: Boolean(cli.flags.flexMode),
|
||||
notify: Boolean(cli.flags.notify),
|
||||
};
|
||||
|
||||
// Check for updates after loading config
|
||||
// This is important because we write state file in the config dir
|
||||
await checkForUpdates().catch();
|
||||
// ---------------------------------------------------------------------------
|
||||
// --flex-mode validation (only allowed for o3 and o4-mini)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
if (cli.flags.flexMode) {
|
||||
const allowedFlexModels = new Set(["o3", "o4-mini"]);
|
||||
if (!allowedFlexModels.has(config.model)) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(
|
||||
`The --flex-mode option is only supported when using the 'o3' or 'o4-mini' models. ` +
|
||||
`Current model: '${config.model}'.`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!(await isModelSupportedForResponses(config.model))) {
|
||||
// eslint-disable-next-line no-console
|
||||
|
||||
@@ -59,6 +59,7 @@ const colorsByPolicy: Record<ApprovalPolicy, ColorName | undefined> = {
|
||||
async function generateCommandExplanation(
|
||||
command: Array<string>,
|
||||
model: string,
|
||||
flexMode: boolean,
|
||||
): Promise<string> {
|
||||
try {
|
||||
// Create a temporary OpenAI client
|
||||
@@ -73,6 +74,7 @@ async function generateCommandExplanation(
|
||||
// Create a prompt that asks for an explanation with a more detailed system prompt
|
||||
const response = await oai.chat.completions.create({
|
||||
model,
|
||||
...(flexMode ? { service_tier: "flex" } : {}),
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
@@ -142,7 +144,11 @@ export default function TerminalChat({
|
||||
const handleCompact = async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
const summary = await generateCompactSummary(items, model);
|
||||
const summary = await generateCompactSummary(
|
||||
items,
|
||||
model,
|
||||
Boolean(config.flexMode),
|
||||
);
|
||||
setItems([
|
||||
{
|
||||
id: `compact-${Date.now()}`,
|
||||
@@ -245,7 +251,11 @@ export default function TerminalChat({
|
||||
log(`Generating explanation for command: ${commandForDisplay}`);
|
||||
|
||||
// Generate an explanation using the same model
|
||||
const explanation = await generateCommandExplanation(command, model);
|
||||
const explanation = await generateCommandExplanation(
|
||||
command,
|
||||
model,
|
||||
Boolean(config.flexMode),
|
||||
);
|
||||
log(`Generated explanation: ${explanation}`);
|
||||
|
||||
// Ask for confirmation again, but with the explanation
|
||||
@@ -453,6 +463,7 @@ export default function TerminalChat({
|
||||
colorsByPolicy,
|
||||
agent,
|
||||
initialImagePaths,
|
||||
flexModeEnabled: Boolean(config.flexMode),
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
|
||||
@@ -13,6 +13,7 @@ export interface TerminalHeaderProps {
|
||||
colorsByPolicy: Record<string, string | undefined>;
|
||||
agent?: AgentLoop;
|
||||
initialImagePaths?: Array<string>;
|
||||
flexModeEnabled?: boolean;
|
||||
}
|
||||
|
||||
const TerminalHeader: React.FC<TerminalHeaderProps> = ({
|
||||
@@ -24,6 +25,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
|
||||
colorsByPolicy,
|
||||
agent,
|
||||
initialImagePaths,
|
||||
flexModeEnabled = false,
|
||||
}) => {
|
||||
return (
|
||||
<>
|
||||
@@ -32,6 +34,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
|
||||
<Text>
|
||||
● Codex v{version} – {PWD} – {model} –{" "}
|
||||
<Text color={colorsByPolicy[approvalPolicy]}>{approvalPolicy}</Text>
|
||||
{flexModeEnabled ? " – flex-mode" : ""}
|
||||
</Text>
|
||||
) : (
|
||||
<>
|
||||
@@ -68,6 +71,12 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
|
||||
{approvalPolicy}
|
||||
</Text>
|
||||
</Text>
|
||||
{flexModeEnabled && (
|
||||
<Text dimColor>
|
||||
<Text color="blueBright">↳</Text> flex-mode:{" "}
|
||||
<Text bold>enabled</Text>
|
||||
</Text>
|
||||
)}
|
||||
{initialImagePaths?.map((img, idx) => (
|
||||
<Text key={img ?? idx} color="gray">
|
||||
<Text color="blueBright">↳</Text> image:{" "}
|
||||
|
||||
@@ -400,6 +400,7 @@ export function SinglePassApp({
|
||||
});
|
||||
const chatResp = await openai.beta.chat.completions.parse({
|
||||
model: config.model,
|
||||
...(config.flexMode ? { service_tier: "flex" } : {}),
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
|
||||
@@ -516,6 +516,7 @@ export class AgentLoop {
|
||||
stream: true,
|
||||
parallel_tool_calls: false,
|
||||
reasoning,
|
||||
...(this.config.flexMode ? { service_tier: "flex" } : {}),
|
||||
tools: [
|
||||
{
|
||||
type: "function",
|
||||
|
||||
@@ -9,9 +9,17 @@ import OpenAI from "openai";
|
||||
* @param model The model to use for generating the summary
|
||||
* @returns A concise structured summary string
|
||||
*/
|
||||
/**
|
||||
* Generate a condensed summary of the conversation items.
|
||||
* @param items The list of conversation items to summarize
|
||||
* @param model The model to use for generating the summary
|
||||
* @param flexMode Whether to use the flex-mode service tier
|
||||
* @returns A concise structured summary string
|
||||
*/
|
||||
export async function generateCompactSummary(
|
||||
items: Array<ResponseItem>,
|
||||
model: string,
|
||||
flexMode = false,
|
||||
): Promise<string> {
|
||||
const oai = new OpenAI({
|
||||
apiKey: process.env["OPENAI_API_KEY"],
|
||||
@@ -44,6 +52,7 @@ export async function generateCompactSummary(
|
||||
|
||||
const response = await oai.chat.completions.create({
|
||||
model,
|
||||
...(flexMode ? { service_tier: "flex" } : {}),
|
||||
messages: [
|
||||
{
|
||||
role: "assistant",
|
||||
|
||||
@@ -79,6 +79,9 @@ export type AppConfig = {
|
||||
memory?: MemoryConfig;
|
||||
/** Whether to enable desktop notifications for responses */
|
||||
notify: boolean;
|
||||
|
||||
/** Enable the "flex-mode" processing mode for supported models (o3, o4-mini) */
|
||||
flexMode?: boolean;
|
||||
history?: {
|
||||
maxSize: number;
|
||||
saveHistory: boolean;
|
||||
|
||||
Reference in New Issue
Block a user