feat: add flex mode option for cost savings (#372)

Adding in an option to turn on flex processing mode to reduce costs when
running the agent.

Bumped the openai typescript version to add the new feature.

---------

Co-authored-by: Thibault Sottiaux <tibo@openai.com>
This commit is contained in:
salama-openai
2025-04-18 22:15:01 -07:00
committed by GitHub
parent 6f2271e8cd
commit 1a8610cd9e
7 changed files with 60 additions and 2 deletions

View File

@@ -516,6 +516,7 @@ export class AgentLoop {
stream: true,
parallel_tool_calls: false,
reasoning,
...(this.config.flexMode ? { service_tier: "flex" } : {}),
tools: [
{
type: "function",

View File

@@ -9,9 +9,17 @@ import OpenAI from "openai";
* @param model The model to use for generating the summary
* @returns A concise structured summary string
*/
/**
* Generate a condensed summary of the conversation items.
* @param items The list of conversation items to summarize
* @param model The model to use for generating the summary
* @param flexMode Whether to use the flex-mode service tier
* @returns A concise structured summary string
*/
export async function generateCompactSummary(
items: Array<ResponseItem>,
model: string,
flexMode = false,
): Promise<string> {
const oai = new OpenAI({
apiKey: process.env["OPENAI_API_KEY"],
@@ -44,6 +52,7 @@ export async function generateCompactSummary(
const response = await oai.chat.completions.create({
model,
...(flexMode ? { service_tier: "flex" } : {}),
messages: [
{
role: "assistant",

View File

@@ -79,6 +79,9 @@ export type AppConfig = {
memory?: MemoryConfig;
/** Whether to enable desktop notifications for responses */
notify: boolean;
/** Enable the "flex-mode" processing mode for supported models (o3, o4-mini) */
flexMode?: boolean;
history?: {
maxSize: number;
saveHistory: boolean;