bug: non-openai mode - fix for gemini content: null, fix 429 to throw before stream (#563)

Gemini's API is finicky, it 400's without an error when you pass content: null Also fixed the rate limiting issues by throwing outside of the iterator. I think there's a separate issue with the second isRateLimit check in agent-loop - turnInput is cleared by that time, so it retries without the last message.
2025-04-22 20:37:48 -04:00
parent 20b6ef0de8
commit 23f0887df3
2 changed files with 273 additions and 319 deletions
--- a/codex-cli/src/utils/responses.ts
+++ b/codex-cli/src/utils/responses.ts
@@ -3,6 +3,7 @@ import type {
  ResponseCreateParams,
  Response,
 } from "openai/resources/responses/responses";
+
 // Define interfaces based on OpenAI API documentation
 type ResponseCreateInput = ResponseCreateParams;
 type ResponseOutput = Response;
@@ -260,31 +261,7 @@ function convertTools(
    }));
 }

-// Main function with overloading
-async function responsesCreateViaChatCompletions(
-  openai: OpenAI,
-  input: ResponseCreateInput & { stream: true },
-): Promise<AsyncGenerator<ResponseEvent>>;
-async function responsesCreateViaChatCompletions(
-  openai: OpenAI,
-  input: ResponseCreateInput & { stream?: false },
-): Promise<ResponseOutput>;
-async function responsesCreateViaChatCompletions(
-  openai: OpenAI,
-  input: ResponseCreateInput,
-): Promise<ResponseOutput | AsyncGenerator<ResponseEvent>> {
-  if (input.stream) {
-    return streamResponses(openai, input);
-  } else {
-    return nonStreamResponses(openai, input);
-  }
-}
-
-// Non-streaming implementation
-async function nonStreamResponses(
-  openai: OpenAI,
-  input: ResponseCreateInput,
-): Promise<ResponseOutput> {
+const createCompletion = (openai: OpenAI, input: ResponseCreateInput) => {
  const fullMessages = getFullMessages(input);
  const chatTools = convertTools(input.tools);
  const webSearchOptions = input.tools?.some(
@@ -298,17 +275,55 @@ async function nonStreamResponses(
    messages: fullMessages,
    tools: chatTools,
    web_search_options: webSearchOptions,
-    temperature: input.temperature,
-    top_p: input.top_p,
+    temperature: input.temperature ?? 1.0,
+    top_p: input.top_p ?? 1.0,
    tool_choice: (input.tool_choice === "auto"
      ? "auto"
      : input.tool_choice) as OpenAI.Chat.Completions.ChatCompletionCreateParams["tool_choice"],
+    stream: input.stream || false,
    user: input.user,
    metadata: input.metadata,
  };

+  return openai.chat.completions.create(chatInput);
+};
+
+// Main function with overloading
+async function responsesCreateViaChatCompletions(
+  openai: OpenAI,
+  input: ResponseCreateInput & { stream: true },
+): Promise<AsyncGenerator<ResponseEvent>>;
+async function responsesCreateViaChatCompletions(
+  openai: OpenAI,
+  input: ResponseCreateInput & { stream?: false },
+): Promise<ResponseOutput>;
+async function responsesCreateViaChatCompletions(
+  openai: OpenAI,
+  input: ResponseCreateInput,
+): Promise<ResponseOutput | AsyncGenerator<ResponseEvent>> {
+  const completion = await createCompletion(openai, input);
+  if (input.stream) {
+    return streamResponses(
+      input,
+      completion as AsyncIterable<OpenAI.ChatCompletionChunk>,
+    );
+  } else {
+    return nonStreamResponses(
+      input,
+      completion as unknown as OpenAI.Chat.Completions.ChatCompletion,
+    );
+  }
+}
+
+// Non-streaming implementation
+async function nonStreamResponses(
+  input: ResponseCreateInput,
+  completion: OpenAI.Chat.Completions.ChatCompletion,
+): Promise<ResponseOutput> {
+  const fullMessages = getFullMessages(input);
+
  try {
-    const chatResponse = await openai.chat.completions.create(chatInput);
+    const chatResponse = completion;
    if (!("choices" in chatResponse) || chatResponse.choices.length === 0) {
      throw new Error("No choices in chat completion response");
    }
@@ -429,37 +444,11 @@ async function nonStreamResponses(

 // Streaming implementation
 async function* streamResponses(
-  openai: OpenAI,
  input: ResponseCreateInput,
+  completion: AsyncIterable<OpenAI.ChatCompletionChunk>,
 ): AsyncGenerator<ResponseEvent> {
  const fullMessages = getFullMessages(input);
-  const chatTools = convertTools(input.tools);
-  const webSearchOptions = input.tools?.some(
-    (tool) => tool.type === "function" && tool.name === "web_search",
-  )
-    ? {}
-    : undefined;

-  const chatInput: OpenAI.Chat.Completions.ChatCompletionCreateParams = {
-    model: input.model,
-    messages: fullMessages,
-    tools: chatTools,
-    web_search_options: webSearchOptions,
-    temperature: input.temperature ?? 1.0,
-    top_p: input.top_p ?? 1.0,
-    tool_choice: (input.tool_choice === "auto"
-      ? "auto"
-      : input.tool_choice) as OpenAI.Chat.Completions.ChatCompletionCreateParams["tool_choice"],
-    stream: true,
-    user: input.user,
-    metadata: input.metadata,
-  };
-
-  try {
-    // console.error("chatInput", JSON.stringify(chatInput));
-    const stream = await openai.chat.completions.create(chatInput);
-
-    // Initialize state
  const responseId = generateId("resp");
  const outputItemId = generateId("msg");
  let textContentAdded = false;
@@ -496,7 +485,7 @@ async function* streamResponses(
  yield { type: "response.created", response: initialResponse };
  yield { type: "response.in_progress", response: initialResponse };
  let isToolCall = false;
-    for await (const chunk of stream as AsyncIterable<OpenAI.ChatCompletionChunk>) {
+  for await (const chunk of completion as AsyncIterable<OpenAI.ChatCompletionChunk>) {
    // console.error('\nCHUNK: ', JSON.stringify(chunk));
    const choice = chunk.choices[0];
    if (!choice) {
@@ -676,11 +665,15 @@ async function* streamResponses(
    } as ResponseOutput;

    // Store history
-      const assistantMessage = {
+    const assistantMessage: OpenAI.Chat.Completions.ChatCompletionMessageParam =
+      {
        role: "assistant" as const,
-        content: textContent || null,
      };

+    if (textContent) {
+      assistantMessage.content = textContent;
+    }
+
    // Add tool_calls property if needed
    if (toolCalls.size > 0) {
      const toolCallsArray = Array.from(toolCalls.values()).map((tc) => ({
@@ -714,18 +707,6 @@ async function* streamResponses(

    yield { type: "response.completed", response: finalResponse };
  }
-  } catch (error) {
-    // console.error('\nERROR: ', JSON.stringify(error));
-    yield {
-      type: "error",
-      code:
-        error instanceof Error && "code" in error
-          ? (error as { code: string }).code
-          : "unknown",
-      message: error instanceof Error ? error.message : String(error),
-      param: null,
-    };
-  }
 }

 export {
--- a/codex-cli/tests/responses-chat-completions.test.ts
+++ b/codex-cli/tests/responses-chat-completions.test.ts
@@ -294,7 +294,7 @@ describe("responsesCreateViaChatCompletions", () => {
        expect(callArgs.messages).toEqual([
          { role: "user", content: "Hello world" },
        ]);
-        expect(callArgs.stream).toBeUndefined();
+        expect(callArgs.stream).toBe(false);
      }

      // Verify result format
@@ -736,33 +736,6 @@ describe("responsesCreateViaChatCompletions", () => {
      }
    });

-    it("should handle errors gracefully", async () => {
-      // Setup mock to throw an error
-      openAiState.createSpy = vi
-        .fn()
-        .mockRejectedValue(new Error("API connection error"));
-
-      const openaiClient = new (await import("openai")).default({
-        apiKey: "test-key",
-      }) as unknown as OpenAI;
-
-      const inputMessage = createTestInput({
-        model: "gpt-4o",
-        userMessage: "Test message",
-        stream: false,
-      });
-
-      // Expect the function to throw an error
-      await expect(
-        responsesModule.responsesCreateViaChatCompletions(
-          openaiClient,
-          inputMessage as unknown as ResponseCreateParamsNonStreaming & {
-            stream?: false | undefined;
-          },
-        ),
-      ).rejects.toThrow("Failed to process chat completion");
-    });
-
    it("handles streaming with tool calls", async () => {
      // Mock a streaming response with tool calls
      const mockStream = createToolCallsStream();