feat: add images support to the Codex Typescript SDK (#5281)

Extend `run` and `runStreamed` input to be either a `string` or
structured input. A structured input is an array of text parts and/or
image paths, which will then be fed to the CLI through the `--image`
argument. Text parts are combined with double newlines. For instance:

```ts
const turn = await thread.run([
  { type: "text", text: "Describe these screenshots" },
  { type: "local_image", path: "./ui.png" },
  { type: "local_image", path: "./diagram.jpg" },
  { type: "text", text: "Thanks!" },
]);
```

Ends up launching the CLI with:

```
codex exec --image foo.png --image bar.png "Describe these screenshots\n\nThanks!" 
```

The complete `Input` type for both function now is:

```ts
export type UserInput =
  | {
      type: "text";
      text: string;
    }
  | {
      type: "local_image";
      path: string;
    };

export type Input = string | UserInput[];
```

This brings the Codex SDK closer to feature parity with the CLI.
Adresses #5280 .
This commit is contained in:
needs
2025-10-20 18:54:59 +02:00
committed by GitHub
parent 540abfa05e
commit 3282e86a60
5 changed files with 130 additions and 7 deletions

View File

@@ -279,6 +279,82 @@ describe("Codex", () => {
await close();
}
});
it("combines structured text input segments", async () => {
const { url, close, requests } = await startResponsesTestProxy({
statusCode: 200,
responseBodies: [
sse(
responseStarted("response_1"),
assistantMessage("Combined input applied", "item_1"),
responseCompleted("response_1"),
),
],
});
try {
const client = new Codex({ codexPathOverride: codexExecPath, baseUrl: url, apiKey: "test" });
const thread = client.startThread();
await thread.run([
{ type: "text", text: "Describe file changes" },
{ type: "text", text: "Focus on impacted tests" },
]);
const payload = requests[0];
expect(payload).toBeDefined();
const lastUser = payload!.json.input.at(-1);
expect(lastUser?.content?.[0]?.text).toBe("Describe file changes\n\nFocus on impacted tests");
} finally {
await close();
}
});
it("forwards images to exec", async () => {
const { url, close } = await startResponsesTestProxy({
statusCode: 200,
responseBodies: [
sse(
responseStarted("response_1"),
assistantMessage("Images applied", "item_1"),
responseCompleted("response_1"),
),
],
});
const { args: spawnArgs, restore } = codexExecSpy();
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "codex-images-"));
const imagesDirectoryEntries: [string, string] = [
path.join(tempDir, "first.png"),
path.join(tempDir, "second.jpg"),
];
imagesDirectoryEntries.forEach((image, index) => {
fs.writeFileSync(image, `image-${index}`);
});
try {
const client = new Codex({ codexPathOverride: codexExecPath, baseUrl: url, apiKey: "test" });
const thread = client.startThread();
await thread.run([
{ type: "text", text: "describe the images" },
{ type: "local_image", path: imagesDirectoryEntries[0] },
{ type: "local_image", path: imagesDirectoryEntries[1] },
]);
const commandArgs = spawnArgs[0];
expect(commandArgs).toBeDefined();
const forwardedImages: string[] = [];
for (let i = 0; i < commandArgs!.length; i += 1) {
if (commandArgs![i] === "--image") {
forwardedImages.push(commandArgs![i + 1] ?? "");
}
}
expect(forwardedImages).toEqual(imagesDirectoryEntries);
} finally {
fs.rmSync(tempDir, { recursive: true, force: true });
restore();
await close();
}
});
it("runs in provided working directory", async () => {
const { url, close } = await startResponsesTestProxy({
statusCode: 200,