52 lines
1.5 KiB
TypeScript
52 lines
1.5 KiB
TypeScript
|
|
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Roughly estimate the number of language‑model tokens represented by a list
|
|||
|
|
* of OpenAI `ResponseItem`s.
|
|||
|
|
*
|
|||
|
|
* A full tokenizer would be more accurate, but would add a heavyweight
|
|||
|
|
* dependency for only marginal benefit. Empirically, assuming ~4 characters
|
|||
|
|
* per token offers a good enough signal for displaying context‑window usage
|
|||
|
|
* to the user.
|
|||
|
|
*
|
|||
|
|
* The algorithm counts characters from the different content types we may
|
|||
|
|
* encounter and then converts that char count to tokens by dividing by four
|
|||
|
|
* and rounding up.
|
|||
|
|
*/
|
|||
|
|
export function approximateTokensUsed(items: Array<ResponseItem>): number {
|
|||
|
|
let charCount = 0;
|
|||
|
|
|
|||
|
|
for (const item of items) {
|
|||
|
|
switch (item.type) {
|
|||
|
|
case "message": {
|
|||
|
|
for (const c of item.content) {
|
|||
|
|
if (c.type === "input_text" || c.type === "output_text") {
|
|||
|
|
charCount += c.text.length;
|
|||
|
|
} else if (c.type === "refusal") {
|
|||
|
|
charCount += c.refusal.length;
|
|||
|
|
} else if (c.type === "input_file") {
|
|||
|
|
charCount += c.filename?.length ?? 0;
|
|||
|
|
}
|
|||
|
|
// images and other content types are ignored (0 chars)
|
|||
|
|
}
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
case "function_call": {
|
|||
|
|
charCount += (item.name?.length || 0) + (item.arguments?.length || 0);
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
case "function_call_output": {
|
|||
|
|
charCount += item.output.length;
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
default:
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return Math.ceil(charCount / 4);
|
|||
|
|
}
|