feat: add images support to the Codex Typescript SDK (#5281)
Extend `run` and `runStreamed` input to be either a `string` or
structured input. A structured input is an array of text parts and/or
image paths, which will then be fed to the CLI through the `--image`
argument. Text parts are combined with double newlines. For instance:
```ts
const turn = await thread.run([
{ type: "text", text: "Describe these screenshots" },
{ type: "local_image", path: "./ui.png" },
{ type: "local_image", path: "./diagram.jpg" },
{ type: "text", text: "Thanks!" },
]);
```
Ends up launching the CLI with:
```
codex exec --image foo.png --image bar.png "Describe these screenshots\n\nThanks!"
```
The complete `Input` type for both function now is:
```ts
export type UserInput =
| {
type: "text";
text: string;
}
| {
type: "local_image";
path: string;
};
export type Input = string | UserInput[];
```
This brings the Codex SDK closer to feature parity with the CLI.
Adresses #5280 .
This commit is contained in:
@@ -279,6 +279,82 @@ describe("Codex", () => {
|
||||
await close();
|
||||
}
|
||||
});
|
||||
it("combines structured text input segments", async () => {
|
||||
const { url, close, requests } = await startResponsesTestProxy({
|
||||
statusCode: 200,
|
||||
responseBodies: [
|
||||
sse(
|
||||
responseStarted("response_1"),
|
||||
assistantMessage("Combined input applied", "item_1"),
|
||||
responseCompleted("response_1"),
|
||||
),
|
||||
],
|
||||
});
|
||||
|
||||
try {
|
||||
const client = new Codex({ codexPathOverride: codexExecPath, baseUrl: url, apiKey: "test" });
|
||||
|
||||
const thread = client.startThread();
|
||||
await thread.run([
|
||||
{ type: "text", text: "Describe file changes" },
|
||||
{ type: "text", text: "Focus on impacted tests" },
|
||||
]);
|
||||
|
||||
const payload = requests[0];
|
||||
expect(payload).toBeDefined();
|
||||
const lastUser = payload!.json.input.at(-1);
|
||||
expect(lastUser?.content?.[0]?.text).toBe("Describe file changes\n\nFocus on impacted tests");
|
||||
} finally {
|
||||
await close();
|
||||
}
|
||||
});
|
||||
it("forwards images to exec", async () => {
|
||||
const { url, close } = await startResponsesTestProxy({
|
||||
statusCode: 200,
|
||||
responseBodies: [
|
||||
sse(
|
||||
responseStarted("response_1"),
|
||||
assistantMessage("Images applied", "item_1"),
|
||||
responseCompleted("response_1"),
|
||||
),
|
||||
],
|
||||
});
|
||||
|
||||
const { args: spawnArgs, restore } = codexExecSpy();
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "codex-images-"));
|
||||
const imagesDirectoryEntries: [string, string] = [
|
||||
path.join(tempDir, "first.png"),
|
||||
path.join(tempDir, "second.jpg"),
|
||||
];
|
||||
imagesDirectoryEntries.forEach((image, index) => {
|
||||
fs.writeFileSync(image, `image-${index}`);
|
||||
});
|
||||
|
||||
try {
|
||||
const client = new Codex({ codexPathOverride: codexExecPath, baseUrl: url, apiKey: "test" });
|
||||
|
||||
const thread = client.startThread();
|
||||
await thread.run([
|
||||
{ type: "text", text: "describe the images" },
|
||||
{ type: "local_image", path: imagesDirectoryEntries[0] },
|
||||
{ type: "local_image", path: imagesDirectoryEntries[1] },
|
||||
]);
|
||||
|
||||
const commandArgs = spawnArgs[0];
|
||||
expect(commandArgs).toBeDefined();
|
||||
const forwardedImages: string[] = [];
|
||||
for (let i = 0; i < commandArgs!.length; i += 1) {
|
||||
if (commandArgs![i] === "--image") {
|
||||
forwardedImages.push(commandArgs![i + 1] ?? "");
|
||||
}
|
||||
}
|
||||
expect(forwardedImages).toEqual(imagesDirectoryEntries);
|
||||
} finally {
|
||||
fs.rmSync(tempDir, { recursive: true, force: true });
|
||||
restore();
|
||||
await close();
|
||||
}
|
||||
});
|
||||
it("runs in provided working directory", async () => {
|
||||
const { url, close } = await startResponsesTestProxy({
|
||||
statusCode: 200,
|
||||
|
||||
Reference in New Issue
Block a user