feat: add flex mode option for cost savings (#372)
Adding in an option to turn on flex processing mode to reduce costs when running the agent. Bumped the openai typescript version to add the new feature. --------- Co-authored-by: Thibault Sottiaux <tibo@openai.com>
This commit is contained in:
@@ -71,6 +71,9 @@ const cli = meow(
|
|||||||
--full-stdout Do not truncate stdout/stderr from command outputs
|
--full-stdout Do not truncate stdout/stderr from command outputs
|
||||||
--notify Enable desktop notifications for responses
|
--notify Enable desktop notifications for responses
|
||||||
|
|
||||||
|
--flex-mode Use "flex-mode" processing mode for the request (only supported
|
||||||
|
with models o3 and o4-mini)
|
||||||
|
|
||||||
Dangerous options
|
Dangerous options
|
||||||
--dangerously-auto-approve-everything
|
--dangerously-auto-approve-everything
|
||||||
Skip all confirmation prompts and execute commands without
|
Skip all confirmation prompts and execute commands without
|
||||||
@@ -140,6 +143,11 @@ const cli = meow(
|
|||||||
type: "string",
|
type: "string",
|
||||||
description: "Path to a markdown file to include as project doc",
|
description: "Path to a markdown file to include as project doc",
|
||||||
},
|
},
|
||||||
|
flexMode: {
|
||||||
|
type: "boolean",
|
||||||
|
description:
|
||||||
|
"Enable the flex-mode service tier (only supported by models o3 and o4-mini)",
|
||||||
|
},
|
||||||
fullStdout: {
|
fullStdout: {
|
||||||
type: "boolean",
|
type: "boolean",
|
||||||
description:
|
description:
|
||||||
@@ -250,12 +258,28 @@ config = {
|
|||||||
apiKey,
|
apiKey,
|
||||||
...config,
|
...config,
|
||||||
model: model ?? config.model,
|
model: model ?? config.model,
|
||||||
|
flexMode: Boolean(cli.flags.flexMode),
|
||||||
notify: Boolean(cli.flags.notify),
|
notify: Boolean(cli.flags.notify),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Check for updates after loading config
|
// Check for updates after loading config
|
||||||
// This is important because we write state file in the config dir
|
// This is important because we write state file in the config dir
|
||||||
await checkForUpdates().catch();
|
await checkForUpdates().catch();
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// --flex-mode validation (only allowed for o3 and o4-mini)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
if (cli.flags.flexMode) {
|
||||||
|
const allowedFlexModels = new Set(["o3", "o4-mini"]);
|
||||||
|
if (!allowedFlexModels.has(config.model)) {
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.error(
|
||||||
|
`The --flex-mode option is only supported when using the 'o3' or 'o4-mini' models. ` +
|
||||||
|
`Current model: '${config.model}'.`,
|
||||||
|
);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!(await isModelSupportedForResponses(config.model))) {
|
if (!(await isModelSupportedForResponses(config.model))) {
|
||||||
// eslint-disable-next-line no-console
|
// eslint-disable-next-line no-console
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ const colorsByPolicy: Record<ApprovalPolicy, ColorName | undefined> = {
|
|||||||
async function generateCommandExplanation(
|
async function generateCommandExplanation(
|
||||||
command: Array<string>,
|
command: Array<string>,
|
||||||
model: string,
|
model: string,
|
||||||
|
flexMode: boolean,
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
try {
|
try {
|
||||||
// Create a temporary OpenAI client
|
// Create a temporary OpenAI client
|
||||||
@@ -73,6 +74,7 @@ async function generateCommandExplanation(
|
|||||||
// Create a prompt that asks for an explanation with a more detailed system prompt
|
// Create a prompt that asks for an explanation with a more detailed system prompt
|
||||||
const response = await oai.chat.completions.create({
|
const response = await oai.chat.completions.create({
|
||||||
model,
|
model,
|
||||||
|
...(flexMode ? { service_tier: "flex" } : {}),
|
||||||
messages: [
|
messages: [
|
||||||
{
|
{
|
||||||
role: "system",
|
role: "system",
|
||||||
@@ -142,7 +144,11 @@ export default function TerminalChat({
|
|||||||
const handleCompact = async () => {
|
const handleCompact = async () => {
|
||||||
setLoading(true);
|
setLoading(true);
|
||||||
try {
|
try {
|
||||||
const summary = await generateCompactSummary(items, model);
|
const summary = await generateCompactSummary(
|
||||||
|
items,
|
||||||
|
model,
|
||||||
|
Boolean(config.flexMode),
|
||||||
|
);
|
||||||
setItems([
|
setItems([
|
||||||
{
|
{
|
||||||
id: `compact-${Date.now()}`,
|
id: `compact-${Date.now()}`,
|
||||||
@@ -245,7 +251,11 @@ export default function TerminalChat({
|
|||||||
log(`Generating explanation for command: ${commandForDisplay}`);
|
log(`Generating explanation for command: ${commandForDisplay}`);
|
||||||
|
|
||||||
// Generate an explanation using the same model
|
// Generate an explanation using the same model
|
||||||
const explanation = await generateCommandExplanation(command, model);
|
const explanation = await generateCommandExplanation(
|
||||||
|
command,
|
||||||
|
model,
|
||||||
|
Boolean(config.flexMode),
|
||||||
|
);
|
||||||
log(`Generated explanation: ${explanation}`);
|
log(`Generated explanation: ${explanation}`);
|
||||||
|
|
||||||
// Ask for confirmation again, but with the explanation
|
// Ask for confirmation again, but with the explanation
|
||||||
@@ -453,6 +463,7 @@ export default function TerminalChat({
|
|||||||
colorsByPolicy,
|
colorsByPolicy,
|
||||||
agent,
|
agent,
|
||||||
initialImagePaths,
|
initialImagePaths,
|
||||||
|
flexModeEnabled: Boolean(config.flexMode),
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
) : (
|
) : (
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ export interface TerminalHeaderProps {
|
|||||||
colorsByPolicy: Record<string, string | undefined>;
|
colorsByPolicy: Record<string, string | undefined>;
|
||||||
agent?: AgentLoop;
|
agent?: AgentLoop;
|
||||||
initialImagePaths?: Array<string>;
|
initialImagePaths?: Array<string>;
|
||||||
|
flexModeEnabled?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
const TerminalHeader: React.FC<TerminalHeaderProps> = ({
|
const TerminalHeader: React.FC<TerminalHeaderProps> = ({
|
||||||
@@ -24,6 +25,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
|
|||||||
colorsByPolicy,
|
colorsByPolicy,
|
||||||
agent,
|
agent,
|
||||||
initialImagePaths,
|
initialImagePaths,
|
||||||
|
flexModeEnabled = false,
|
||||||
}) => {
|
}) => {
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
@@ -32,6 +34,7 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
|
|||||||
<Text>
|
<Text>
|
||||||
● Codex v{version} – {PWD} – {model} –{" "}
|
● Codex v{version} – {PWD} – {model} –{" "}
|
||||||
<Text color={colorsByPolicy[approvalPolicy]}>{approvalPolicy}</Text>
|
<Text color={colorsByPolicy[approvalPolicy]}>{approvalPolicy}</Text>
|
||||||
|
{flexModeEnabled ? " – flex-mode" : ""}
|
||||||
</Text>
|
</Text>
|
||||||
) : (
|
) : (
|
||||||
<>
|
<>
|
||||||
@@ -68,6 +71,12 @@ const TerminalHeader: React.FC<TerminalHeaderProps> = ({
|
|||||||
{approvalPolicy}
|
{approvalPolicy}
|
||||||
</Text>
|
</Text>
|
||||||
</Text>
|
</Text>
|
||||||
|
{flexModeEnabled && (
|
||||||
|
<Text dimColor>
|
||||||
|
<Text color="blueBright">↳</Text> flex-mode:{" "}
|
||||||
|
<Text bold>enabled</Text>
|
||||||
|
</Text>
|
||||||
|
)}
|
||||||
{initialImagePaths?.map((img, idx) => (
|
{initialImagePaths?.map((img, idx) => (
|
||||||
<Text key={img ?? idx} color="gray">
|
<Text key={img ?? idx} color="gray">
|
||||||
<Text color="blueBright">↳</Text> image:{" "}
|
<Text color="blueBright">↳</Text> image:{" "}
|
||||||
|
|||||||
@@ -400,6 +400,7 @@ export function SinglePassApp({
|
|||||||
});
|
});
|
||||||
const chatResp = await openai.beta.chat.completions.parse({
|
const chatResp = await openai.beta.chat.completions.parse({
|
||||||
model: config.model,
|
model: config.model,
|
||||||
|
...(config.flexMode ? { service_tier: "flex" } : {}),
|
||||||
messages: [
|
messages: [
|
||||||
{
|
{
|
||||||
role: "user",
|
role: "user",
|
||||||
|
|||||||
@@ -516,6 +516,7 @@ export class AgentLoop {
|
|||||||
stream: true,
|
stream: true,
|
||||||
parallel_tool_calls: false,
|
parallel_tool_calls: false,
|
||||||
reasoning,
|
reasoning,
|
||||||
|
...(this.config.flexMode ? { service_tier: "flex" } : {}),
|
||||||
tools: [
|
tools: [
|
||||||
{
|
{
|
||||||
type: "function",
|
type: "function",
|
||||||
|
|||||||
@@ -9,9 +9,17 @@ import OpenAI from "openai";
|
|||||||
* @param model The model to use for generating the summary
|
* @param model The model to use for generating the summary
|
||||||
* @returns A concise structured summary string
|
* @returns A concise structured summary string
|
||||||
*/
|
*/
|
||||||
|
/**
|
||||||
|
* Generate a condensed summary of the conversation items.
|
||||||
|
* @param items The list of conversation items to summarize
|
||||||
|
* @param model The model to use for generating the summary
|
||||||
|
* @param flexMode Whether to use the flex-mode service tier
|
||||||
|
* @returns A concise structured summary string
|
||||||
|
*/
|
||||||
export async function generateCompactSummary(
|
export async function generateCompactSummary(
|
||||||
items: Array<ResponseItem>,
|
items: Array<ResponseItem>,
|
||||||
model: string,
|
model: string,
|
||||||
|
flexMode = false,
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const oai = new OpenAI({
|
const oai = new OpenAI({
|
||||||
apiKey: process.env["OPENAI_API_KEY"],
|
apiKey: process.env["OPENAI_API_KEY"],
|
||||||
@@ -44,6 +52,7 @@ export async function generateCompactSummary(
|
|||||||
|
|
||||||
const response = await oai.chat.completions.create({
|
const response = await oai.chat.completions.create({
|
||||||
model,
|
model,
|
||||||
|
...(flexMode ? { service_tier: "flex" } : {}),
|
||||||
messages: [
|
messages: [
|
||||||
{
|
{
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
|
|||||||
@@ -79,6 +79,9 @@ export type AppConfig = {
|
|||||||
memory?: MemoryConfig;
|
memory?: MemoryConfig;
|
||||||
/** Whether to enable desktop notifications for responses */
|
/** Whether to enable desktop notifications for responses */
|
||||||
notify: boolean;
|
notify: boolean;
|
||||||
|
|
||||||
|
/** Enable the "flex-mode" processing mode for supported models (o3, o4-mini) */
|
||||||
|
flexMode?: boolean;
|
||||||
history?: {
|
history?: {
|
||||||
maxSize: number;
|
maxSize: number;
|
||||||
saveHistory: boolean;
|
saveHistory: boolean;
|
||||||
|
|||||||
Reference in New Issue
Block a user