codex-cli/src/utils/model-utils.ts

import type { ResponseItem } from "openai/resources/responses/responses.mjs";

import { approximateTokensUsed } from "./approximate-tokens-used.js";
import { getBaseUrl, getApiKey } from "./config";
import OpenAI from "openai";

const MODEL_LIST_TIMEOUT_MS = 2_000; // 2 seconds
export const RECOMMENDED_MODELS: Array<string> = ["o4-mini", "o3"];

/**
 * Background model loader / cache.
 *
 * We start fetching the list of available models from OpenAI once the CLI
 * enters interactive mode.  The request is made exactly once during the
 * lifetime of the process and the results are cached for subsequent calls.
 */
async function fetchModels(provider: string): Promise<Array<string>> {
  // If the user has not configured an API key we cannot retrieve the models.
  if (!getApiKey(provider)) {
    throw new Error("No API key configured for provider: " + provider);
  }

  try {
    const openai = new OpenAI({
      apiKey: getApiKey(provider),
      baseURL: getBaseUrl(provider),
    });
    const list = await openai.models.list();
    const models: Array<string> = [];
    for await (const model of list as AsyncIterable<{ id?: string }>) {
      if (model && typeof model.id === "string") {
        let modelStr = model.id;
        // Fix for gemini.
        if (modelStr.startsWith("models/")) {
          modelStr = modelStr.replace("models/", "");
        }
        models.push(modelStr);
      }
    }

    return models.sort();
  } catch (error) {
    return [];
  }
}

/** Returns the list of models available for the provided key / credentials. */
export async function getAvailableModels(
  provider: string,
): Promise<Array<string>> {
  return fetchModels(provider.toLowerCase());
}

/**
 * Verifies that the provided model identifier is present in the set returned by
 * {@link getAvailableModels}.
 */
export async function isModelSupportedForResponses(
  provider: string,
  model: string | undefined | null,
): Promise<boolean> {
  if (
    typeof model !== "string" ||
    model.trim() === "" ||
    RECOMMENDED_MODELS.includes(model)
  ) {
    return true;
  }

  try {
    const models = await Promise.race<Array<string>>([
      getAvailableModels(provider),
      new Promise<Array<string>>((resolve) =>
        setTimeout(() => resolve([]), MODEL_LIST_TIMEOUT_MS),
      ),
    ]);

    // If the timeout fired we get an empty list → treat as supported to avoid
    // false negatives.
    if (models.length === 0) {
      return true;
    }

    return models.includes(model.trim());
  } catch {
    // Network or library failure → don't block start‑up.
    return true;
  }
}

/** Returns the maximum context length (in tokens) for a given model. */
function maxTokensForModel(model: string): number {
  // TODO: These numbers are best‑effort guesses and provide a basis for UI percentages. They
  // should be provider & model specific instead of being wild guesses.

  const lower = model.toLowerCase();
  if (lower.includes("32k")) {
    return 32000;
  }
  if (lower.includes("16k")) {
    return 16000;
  }
  if (lower.includes("8k")) {
    return 8000;
  }
  if (lower.includes("4k")) {
    return 4000;
  }
  return 128000; // Default to 128k for any other model.
}

/** Calculates the percentage of tokens remaining in context for a model. */
export function calculateContextPercentRemaining(
  items: Array<ResponseItem>,
  model: string,
): number {
  const used = approximateTokensUsed(items);
  const max = maxTokensForModel(model);
  const remaining = Math.max(0, max - used);
  return (remaining / max) * 100;
}

/**
 * Type‑guard that narrows a {@link ResponseItem} to one that represents a
 * user‑authored message. The OpenAI SDK represents both input *and* output
 * messages with a discriminated union where:
 *   • `type` is the string literal "message" and
 *   • `role` is one of "user" | "assistant" | "system" | "developer".
 *
 * For the purposes of de‑duplication we only care about *user* messages so we
 * detect those here in a single, reusable helper.
 */
function isUserMessage(
  item: ResponseItem,
): item is ResponseItem & { type: "message"; role: "user"; content: unknown } {
  return item.type === "message" && (item as { role?: string }).role === "user";
}

/**
 * Deduplicate the stream of {@link ResponseItem}s before they are persisted in
 * component state.
 *
 * Historically we used the (optional) {@code id} field returned by the
 * OpenAI streaming API as the primary key: the first occurrence of any given
 * {@code id} “won” and subsequent duplicates were dropped.  In practice this
 * proved brittle because locally‑generated user messages don’t include an
 * {@code id}.  The result was that if a user quickly pressed <Enter> twice the
 * exact same message would appear twice in the transcript.
 *
 * The new rules are therefore:
 *   1.  If a {@link ResponseItem} has an {@code id} keep only the *first*
 *       occurrence of that {@code id} (this retains the previous behaviour for
 *       assistant / tool messages).
 *   2.  Additionally, collapse *consecutive* user messages with identical
 *       content.  Two messages are considered identical when their serialized
 *       {@code content} array matches exactly.  We purposefully restrict this
 *       to **adjacent** duplicates so that legitimately repeated questions at
 *       a later point in the conversation are still shown.
 */
export function uniqueById(items: Array<ResponseItem>): Array<ResponseItem> {
  const seenIds = new Set<string>();
  const deduped: Array<ResponseItem> = [];

  for (const item of items) {
    // ──────────────────────────────────────────────────────────────────
    // Rule #1 – de‑duplicate by id when present
    // ──────────────────────────────────────────────────────────────────
    if (typeof item.id === "string" && item.id.length > 0) {
      if (seenIds.has(item.id)) {
        continue; // skip duplicates
      }
      seenIds.add(item.id);
    }

    // ──────────────────────────────────────────────────────────────────
    // Rule #2 – collapse consecutive identical user messages
    // ──────────────────────────────────────────────────────────────────
    if (isUserMessage(item) && deduped.length > 0) {
      const prev = deduped[deduped.length - 1]!;

      if (
        isUserMessage(prev) &&
        // Note: the `content` field is an array of message parts. Performing
        // a deep compare is over‑kill here; serialising to JSON is sufficient
        // (and fast for the tiny payloads involved).
        JSON.stringify(prev.content) === JSON.stringify(item.content)
      ) {
        continue; // skip duplicate user message
      }
    }

    deduped.push(item);
  }

  return deduped;
}
-												chore: consolidate model utils and drive-by cleanups (#476)

Signed-off-by: Thibault Sottiaux <tibo@openai.com>
											
										
										
											2025-04-21 12:33:57 -04:00
+								import type { ResponseItem } from "openai/resources/responses/responses.mjs";
 								import { approximateTokensUsed } from "./approximate-tokens-used.js";
-												feat: support multiple providers via Responses-Completion transformation (#247)

https://github.com/user-attachments/assets/9ecb51be-fa65-4e99-8512-abb898dda569

Implemented it as a transformation between Responses API and Completion
API so that it supports existing providers that implement the Completion
API and minimizes the changes needed to the codex repo.

---------

Co-authored-by: Thibault Sottiaux <tibo@openai.com>
Co-authored-by: Fouad Matin <169186268+fouad-openai@users.noreply.github.com>
Co-authored-by: Fouad Matin <fouad@openai.com>
											
										
										
											2025-04-20 23:59:34 -04:00
+								import { getBaseUrl, getApiKey } from "./config";
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								import OpenAI from "openai";
-												w (#8)


											
										
										
											2025-04-16 10:32:45 -07:00
+								const MODEL_LIST_TIMEOUT_MS = 2_000; // 2 seconds
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								export const RECOMMENDED_MODELS: Array<string> = ["o4-mini", "o3"];
 								/**
 								 * Background model loader / cache.
 								 *
 								 * We start fetching the list of available models from OpenAI once the CLI
 								 * enters interactive mode.  The request is made exactly once during the
 								 * lifetime of the process and the results are cached for subsequent calls.
 								 */
-												feat: support multiple providers via Responses-Completion transformation (#247)

https://github.com/user-attachments/assets/9ecb51be-fa65-4e99-8512-abb898dda569

Implemented it as a transformation between Responses API and Completion
API so that it supports existing providers that implement the Completion
API and minimizes the changes needed to the codex repo.

---------

Co-authored-by: Thibault Sottiaux <tibo@openai.com>
Co-authored-by: Fouad Matin <169186268+fouad-openai@users.noreply.github.com>
Co-authored-by: Fouad Matin <fouad@openai.com>
											
										
										
											2025-04-20 23:59:34 -04:00
+								async function fetchModels(provider: string): Promise<Array<string>> {
-												chore: consolidate model utils and drive-by cleanups (#476)

Signed-off-by: Thibault Sottiaux <tibo@openai.com>
											
										
										
											2025-04-21 12:33:57 -04:00
+								  // If the user has not configured an API key we cannot retrieve the models.
-												feat: support multiple providers via Responses-Completion transformation (#247)

https://github.com/user-attachments/assets/9ecb51be-fa65-4e99-8512-abb898dda569

Implemented it as a transformation between Responses API and Completion
API so that it supports existing providers that implement the Completion
API and minimizes the changes needed to the codex repo.

---------

Co-authored-by: Thibault Sottiaux <tibo@openai.com>
Co-authored-by: Fouad Matin <169186268+fouad-openai@users.noreply.github.com>
Co-authored-by: Fouad Matin <fouad@openai.com>
											
										
										
											2025-04-20 23:59:34 -04:00
+								  if (!getApiKey(provider)) {
 								    throw new Error("No API key configured for provider: " + provider);
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								  }
 								  try {
-												fix: inconsistent usage of base URL and API key (#507)

A recent commit introduced the ability to use third-party model
providers. (Really appreciate it!)

However, the usage is inconsistent: some pieces of code use the custom
providers, whereas others still have the old behavior. Additionally,
`OPENAI_BASE_URL` is now being disregarded when it shouldn't be.

This PR normalizes the usage to `getApiKey` and `getBaseUrl`, and
enables the use of `OPENAI_BASE_URL` if present.

---------

Co-authored-by: Gabriel Bianconi <GabrielBianconi@users.noreply.github.com>
											
										
										
											2025-04-22 10:51:26 -04:00
+								    const openai = new OpenAI({
 								      apiKey: getApiKey(provider),
 								      baseURL: getBaseUrl(provider),
 								    });
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								    const list = await openai.models.list();
 								    const models: Array<string> = [];
 								    for await (const model of list as AsyncIterable<{ id?: string }>) {
 								      if (model && typeof model.id === "string") {
-												feat: support multiple providers via Responses-Completion transformation (#247)

https://github.com/user-attachments/assets/9ecb51be-fa65-4e99-8512-abb898dda569

Implemented it as a transformation between Responses API and Completion
API so that it supports existing providers that implement the Completion
API and minimizes the changes needed to the codex repo.

---------

Co-authored-by: Thibault Sottiaux <tibo@openai.com>
Co-authored-by: Fouad Matin <169186268+fouad-openai@users.noreply.github.com>
Co-authored-by: Fouad Matin <fouad@openai.com>
											
										
										
											2025-04-20 23:59:34 -04:00
+								        let modelStr = model.id;
-												chore: consolidate model utils and drive-by cleanups (#476)

Signed-off-by: Thibault Sottiaux <tibo@openai.com>
											
										
										
											2025-04-21 12:33:57 -04:00
+								        // Fix for gemini.
-												feat: support multiple providers via Responses-Completion transformation (#247)

https://github.com/user-attachments/assets/9ecb51be-fa65-4e99-8512-abb898dda569

Implemented it as a transformation between Responses API and Completion
API so that it supports existing providers that implement the Completion
API and minimizes the changes needed to the codex repo.

---------

Co-authored-by: Thibault Sottiaux <tibo@openai.com>
Co-authored-by: Fouad Matin <169186268+fouad-openai@users.noreply.github.com>
Co-authored-by: Fouad Matin <fouad@openai.com>
											
										
										
											2025-04-20 23:59:34 -04:00
+								        if (modelStr.startsWith("models/")) {
 								          modelStr = modelStr.replace("models/", "");
 								        }
 								        models.push(modelStr);
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      }
 								    }
 								    return models.sort();
-												feat: support multiple providers via Responses-Completion transformation (#247)

https://github.com/user-attachments/assets/9ecb51be-fa65-4e99-8512-abb898dda569

Implemented it as a transformation between Responses API and Completion
API so that it supports existing providers that implement the Completion
API and minimizes the changes needed to the codex repo.

---------

Co-authored-by: Thibault Sottiaux <tibo@openai.com>
Co-authored-by: Fouad Matin <169186268+fouad-openai@users.noreply.github.com>
Co-authored-by: Fouad Matin <fouad@openai.com>
											
										
										
											2025-04-20 23:59:34 -04:00
+								  } catch (error) {
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								    return [];
 								  }
 								}
-												chore: consolidate model utils and drive-by cleanups (#476)

Signed-off-by: Thibault Sottiaux <tibo@openai.com>
											
										
										
											2025-04-21 12:33:57 -04:00
+								/** Returns the list of models available for the provided key / credentials. */
-												feat: support multiple providers via Responses-Completion transformation (#247)

https://github.com/user-attachments/assets/9ecb51be-fa65-4e99-8512-abb898dda569

Implemented it as a transformation between Responses API and Completion
API so that it supports existing providers that implement the Completion
API and minimizes the changes needed to the codex repo.

---------

Co-authored-by: Thibault Sottiaux <tibo@openai.com>
Co-authored-by: Fouad Matin <169186268+fouad-openai@users.noreply.github.com>
Co-authored-by: Fouad Matin <fouad@openai.com>
											
										
										
											2025-04-20 23:59:34 -04:00
+								export async function getAvailableModels(
 								  provider: string,
 								): Promise<Array<string>> {
 								  return fetchModels(provider.toLowerCase());
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								}
 								/**
-												chore: consolidate model utils and drive-by cleanups (#476)

Signed-off-by: Thibault Sottiaux <tibo@openai.com>
											
										
										
											2025-04-21 12:33:57 -04:00
+								 * Verifies that the provided model identifier is present in the set returned by
 								 * {@link getAvailableModels}.
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								 */
 								export async function isModelSupportedForResponses(
-												chore: consolidate model utils and drive-by cleanups (#476)

Signed-off-by: Thibault Sottiaux <tibo@openai.com>
											
										
										
											2025-04-21 12:33:57 -04:00
+								  provider: string,
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								  model: string | undefined | null,
 								): Promise<boolean> {
 								  if (
 								    typeof model !== "string" ||
 								    model.trim() === "" ||
 								    RECOMMENDED_MODELS.includes(model)
 								  ) {
 								    return true;
 								  }
 								  try {
 								    const models = await Promise.race<Array<string>>([
-												chore: consolidate model utils and drive-by cleanups (#476)

Signed-off-by: Thibault Sottiaux <tibo@openai.com>
											
										
										
											2025-04-21 12:33:57 -04:00
+								      getAvailableModels(provider),
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      new Promise<Array<string>>((resolve) =>
 								        setTimeout(() => resolve([]), MODEL_LIST_TIMEOUT_MS),
 								      ),
 								    ]);
 								    // If the timeout fired we get an empty list → treat as supported to avoid
 								    // false negatives.
 								    if (models.length === 0) {
 								      return true;
 								    }
 								    return models.includes(model.trim());
 								  } catch {
 								    // Network or library failure → don't block start‑up.
 								    return true;
 								  }
 								}
-												chore: consolidate model utils and drive-by cleanups (#476)

Signed-off-by: Thibault Sottiaux <tibo@openai.com>
											
										
										
											2025-04-21 12:33:57 -04:00
 								/** Returns the maximum context length (in tokens) for a given model. */
 								function maxTokensForModel(model: string): number {
 								  // TODO: These numbers are best‑effort guesses and provide a basis for UI percentages. They
 								  // should be provider & model specific instead of being wild guesses.
 								  const lower = model.toLowerCase();
 								  if (lower.includes("32k")) {
 								    return 32000;
 								  }
 								  if (lower.includes("16k")) {
 								    return 16000;
 								  }
 								  if (lower.includes("8k")) {
 								    return 8000;
 								  }
 								  if (lower.includes("4k")) {
 								    return 4000;
 								  }
 								  return 128000; // Default to 128k for any other model.
 								}
 								/** Calculates the percentage of tokens remaining in context for a model. */
 								export function calculateContextPercentRemaining(
 								  items: Array<ResponseItem>,
 								  model: string,
 								): number {
 								  const used = approximateTokensUsed(items);
 								  const max = maxTokensForModel(model);
 								  const remaining = Math.max(0, max - used);
 								  return (remaining / max) * 100;
 								}
 								/**
 								 * Type‑guard that narrows a {@link ResponseItem} to one that represents a
 								 * user‑authored message. The OpenAI SDK represents both input *and* output
 								 * messages with a discriminated union where:
 								 *   • `type` is the string literal "message" and
 								 *   • `role` is one of "user" | "assistant" | "system" | "developer".
 								 *
 								 * For the purposes of de‑duplication we only care about *user* messages so we
 								 * detect those here in a single, reusable helper.
 								 */
 								function isUserMessage(
 								  item: ResponseItem,
 								): item is ResponseItem & { type: "message"; role: "user"; content: unknown } {
 								  return item.type === "message" && (item as { role?: string }).role === "user";
 								}
 								/**
 								 * Deduplicate the stream of {@link ResponseItem}s before they are persisted in
 								 * component state.
 								 *
 								 * Historically we used the (optional) {@code id} field returned by the
 								 * OpenAI streaming API as the primary key: the first occurrence of any given
 								 * {@code id} “won” and subsequent duplicates were dropped.  In practice this
 								 * proved brittle because locally‑generated user messages don’t include an
 								 * {@code id}.  The result was that if a user quickly pressed <Enter> twice the
 								 * exact same message would appear twice in the transcript.
 								 *
 								 * The new rules are therefore:
 								 *   1.  If a {@link ResponseItem} has an {@code id} keep only the *first*
 								 *       occurrence of that {@code id} (this retains the previous behaviour for
 								 *       assistant / tool messages).
 								 *   2.  Additionally, collapse *consecutive* user messages with identical
 								 *       content.  Two messages are considered identical when their serialized
 								 *       {@code content} array matches exactly.  We purposefully restrict this
 								 *       to **adjacent** duplicates so that legitimately repeated questions at
 								 *       a later point in the conversation are still shown.
 								 */
 								export function uniqueById(items: Array<ResponseItem>): Array<ResponseItem> {
 								  const seenIds = new Set<string>();
 								  const deduped: Array<ResponseItem> = [];
 								  for (const item of items) {
 								    // ──────────────────────────────────────────────────────────────────
 								    // Rule #1 – de‑duplicate by id when present
 								    // ──────────────────────────────────────────────────────────────────
 								    if (typeof item.id === "string" && item.id.length > 0) {
 								      if (seenIds.has(item.id)) {
 								        continue; // skip duplicates
 								      }
 								      seenIds.add(item.id);
 								    }
 								    // ──────────────────────────────────────────────────────────────────
 								    // Rule #2 – collapse consecutive identical user messages
 								    // ──────────────────────────────────────────────────────────────────
 								    if (isUserMessage(item) && deduped.length > 0) {
 								      const prev = deduped[deduped.length - 1]!;
 								      if (
 								        isUserMessage(prev) &&
 								        // Note: the `content` field is an array of message parts. Performing
 								        // a deep compare is over‑kill here; serialising to JSON is sufficient
 								        // (and fast for the tiny payloads involved).
 								        JSON.stringify(prev.content) === JSON.stringify(item.content)
 								      ) {
 								        continue; // skip duplicate user message
 								      }
 								    }
 								    deduped.push(item);
 								  }
 								  return deduped;
 								}