feat: add images support to the Codex Typescript SDK (#5281)
Extend `run` and `runStreamed` input to be either a `string` or
structured input. A structured input is an array of text parts and/or
image paths, which will then be fed to the CLI through the `--image`
argument. Text parts are combined with double newlines. For instance:
```ts
const turn = await thread.run([
{ type: "text", text: "Describe these screenshots" },
{ type: "local_image", path: "./ui.png" },
{ type: "local_image", path: "./diagram.jpg" },
{ type: "text", text: "Thanks!" },
]);
```
Ends up launching the CLI with:
```
codex exec --image foo.png --image bar.png "Describe these screenshots\n\nThanks!"
```
The complete `Input` type for both function now is:
```ts
export type UserInput =
| {
type: "text";
text: string;
}
| {
type: "local_image";
path: string;
};
export type Input = string | UserInput[];
```
This brings the Codex SDK closer to feature parity with the CLI.
Adresses #5280 .
This commit is contained in:
@@ -83,6 +83,18 @@ const turn = await thread.run("Summarize repository status", {
|
||||
console.log(turn.finalResponse);
|
||||
```
|
||||
|
||||
### Attaching images
|
||||
|
||||
Provide structured input entries when you need to include images alongside text. Text entries are concatenated into the final prompt while image entries are passed to the Codex CLI via `--image`.
|
||||
|
||||
```typescript
|
||||
const turn = await thread.run([
|
||||
{ type: "text", text: "Describe these screenshots" },
|
||||
{ type: "local_image", path: "./ui.png" },
|
||||
{ type: "local_image", path: "./diagram.jpg" },
|
||||
]);
|
||||
```
|
||||
|
||||
### Resuming an existing thread
|
||||
|
||||
Threads are persisted in `~/.codex/sessions`. If you lose the in-memory `Thread` object, reconstruct it with `resumeThread()` and keep going.
|
||||
@@ -95,7 +107,7 @@ await thread.run("Implement the fix");
|
||||
|
||||
### Working directory controls
|
||||
|
||||
Codex runs in the current working directory by default. To avoid unrecoverable errors, Codex requires the working directory to be a Git repository. You can skip the Git repository check by passing the `skipGitRepoCheck` option when creating a thread.
|
||||
Codex runs in the current working directory by default. To avoid unrecoverable errors, Codex requires the working directory to be a Git repository. You can skip the Git repository check by passing the `skipGitRepoCheck` option when creating a thread.
|
||||
|
||||
```typescript
|
||||
const thread = codex.startThread({
|
||||
|
||||
@@ -11,6 +11,7 @@ export type CodexExecArgs = {
|
||||
baseUrl?: string;
|
||||
apiKey?: string;
|
||||
threadId?: string | null;
|
||||
images?: string[];
|
||||
// --model
|
||||
model?: string;
|
||||
// --sandbox
|
||||
@@ -55,6 +56,12 @@ export class CodexExec {
|
||||
commandArgs.push("--output-schema", args.outputSchemaFile);
|
||||
}
|
||||
|
||||
if (args.images?.length) {
|
||||
for (const image of args.images) {
|
||||
commandArgs.push("--image", image);
|
||||
}
|
||||
}
|
||||
|
||||
if (args.threadId) {
|
||||
commandArgs.push("resume", args.threadId);
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ export type {
|
||||
} from "./items";
|
||||
|
||||
export { Thread } from "./thread";
|
||||
export type { RunResult, RunStreamedResult, Input } from "./thread";
|
||||
export type { RunResult, RunStreamedResult, Input, UserInput } from "./thread";
|
||||
|
||||
export { Codex } from "./codex";
|
||||
|
||||
|
||||
@@ -25,7 +25,17 @@ export type StreamedTurn = {
|
||||
export type RunStreamedResult = StreamedTurn;
|
||||
|
||||
/** An input to send to the agent. */
|
||||
export type Input = string;
|
||||
export type UserInput =
|
||||
| {
|
||||
type: "text";
|
||||
text: string;
|
||||
}
|
||||
| {
|
||||
type: "local_image";
|
||||
path: string;
|
||||
};
|
||||
|
||||
export type Input = string | UserInput[];
|
||||
|
||||
/** Respesent a thread of conversation with the agent. One thread can have multiple consecutive turns. */
|
||||
export class Thread {
|
||||
@@ -53,21 +63,23 @@ export class Thread {
|
||||
}
|
||||
|
||||
/** Provides the input to the agent and streams events as they are produced during the turn. */
|
||||
async runStreamed(input: string, turnOptions: TurnOptions = {}): Promise<StreamedTurn> {
|
||||
async runStreamed(input: Input, turnOptions: TurnOptions = {}): Promise<StreamedTurn> {
|
||||
return { events: this.runStreamedInternal(input, turnOptions) };
|
||||
}
|
||||
|
||||
private async *runStreamedInternal(
|
||||
input: string,
|
||||
input: Input,
|
||||
turnOptions: TurnOptions = {},
|
||||
): AsyncGenerator<ThreadEvent> {
|
||||
const { schemaPath, cleanup } = await createOutputSchemaFile(turnOptions.outputSchema);
|
||||
const options = this._threadOptions;
|
||||
const { prompt, images } = normalizeInput(input);
|
||||
const generator = this._exec.run({
|
||||
input,
|
||||
input: prompt,
|
||||
baseUrl: this._options.baseUrl,
|
||||
apiKey: this._options.apiKey,
|
||||
threadId: this._id,
|
||||
images,
|
||||
model: options?.model,
|
||||
sandboxMode: options?.sandboxMode,
|
||||
workingDirectory: options?.workingDirectory,
|
||||
@@ -93,7 +105,7 @@ export class Thread {
|
||||
}
|
||||
|
||||
/** Provides the input to the agent and returns the completed turn. */
|
||||
async run(input: string, turnOptions: TurnOptions = {}): Promise<Turn> {
|
||||
async run(input: Input, turnOptions: TurnOptions = {}): Promise<Turn> {
|
||||
const generator = this.runStreamedInternal(input, turnOptions);
|
||||
const items: ThreadItem[] = [];
|
||||
let finalResponse: string = "";
|
||||
@@ -118,3 +130,19 @@ export class Thread {
|
||||
return { items, finalResponse, usage };
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeInput(input: Input): { prompt: string; images: string[] } {
|
||||
if (typeof input === "string") {
|
||||
return { prompt: input, images: [] };
|
||||
}
|
||||
const promptParts: string[] = [];
|
||||
const images: string[] = [];
|
||||
for (const item of input) {
|
||||
if (item.type === "text") {
|
||||
promptParts.push(item.text);
|
||||
} else if (item.type === "local_image") {
|
||||
images.push(item.path);
|
||||
}
|
||||
}
|
||||
return { prompt: promptParts.join("\n\n"), images };
|
||||
}
|
||||
|
||||
@@ -279,6 +279,82 @@ describe("Codex", () => {
|
||||
await close();
|
||||
}
|
||||
});
|
||||
it("combines structured text input segments", async () => {
|
||||
const { url, close, requests } = await startResponsesTestProxy({
|
||||
statusCode: 200,
|
||||
responseBodies: [
|
||||
sse(
|
||||
responseStarted("response_1"),
|
||||
assistantMessage("Combined input applied", "item_1"),
|
||||
responseCompleted("response_1"),
|
||||
),
|
||||
],
|
||||
});
|
||||
|
||||
try {
|
||||
const client = new Codex({ codexPathOverride: codexExecPath, baseUrl: url, apiKey: "test" });
|
||||
|
||||
const thread = client.startThread();
|
||||
await thread.run([
|
||||
{ type: "text", text: "Describe file changes" },
|
||||
{ type: "text", text: "Focus on impacted tests" },
|
||||
]);
|
||||
|
||||
const payload = requests[0];
|
||||
expect(payload).toBeDefined();
|
||||
const lastUser = payload!.json.input.at(-1);
|
||||
expect(lastUser?.content?.[0]?.text).toBe("Describe file changes\n\nFocus on impacted tests");
|
||||
} finally {
|
||||
await close();
|
||||
}
|
||||
});
|
||||
it("forwards images to exec", async () => {
|
||||
const { url, close } = await startResponsesTestProxy({
|
||||
statusCode: 200,
|
||||
responseBodies: [
|
||||
sse(
|
||||
responseStarted("response_1"),
|
||||
assistantMessage("Images applied", "item_1"),
|
||||
responseCompleted("response_1"),
|
||||
),
|
||||
],
|
||||
});
|
||||
|
||||
const { args: spawnArgs, restore } = codexExecSpy();
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "codex-images-"));
|
||||
const imagesDirectoryEntries: [string, string] = [
|
||||
path.join(tempDir, "first.png"),
|
||||
path.join(tempDir, "second.jpg"),
|
||||
];
|
||||
imagesDirectoryEntries.forEach((image, index) => {
|
||||
fs.writeFileSync(image, `image-${index}`);
|
||||
});
|
||||
|
||||
try {
|
||||
const client = new Codex({ codexPathOverride: codexExecPath, baseUrl: url, apiKey: "test" });
|
||||
|
||||
const thread = client.startThread();
|
||||
await thread.run([
|
||||
{ type: "text", text: "describe the images" },
|
||||
{ type: "local_image", path: imagesDirectoryEntries[0] },
|
||||
{ type: "local_image", path: imagesDirectoryEntries[1] },
|
||||
]);
|
||||
|
||||
const commandArgs = spawnArgs[0];
|
||||
expect(commandArgs).toBeDefined();
|
||||
const forwardedImages: string[] = [];
|
||||
for (let i = 0; i < commandArgs!.length; i += 1) {
|
||||
if (commandArgs![i] === "--image") {
|
||||
forwardedImages.push(commandArgs![i + 1] ?? "");
|
||||
}
|
||||
}
|
||||
expect(forwardedImages).toEqual(imagesDirectoryEntries);
|
||||
} finally {
|
||||
fs.rmSync(tempDir, { recursive: true, force: true });
|
||||
restore();
|
||||
await close();
|
||||
}
|
||||
});
|
||||
it("runs in provided working directory", async () => {
|
||||
const { url, close } = await startResponsesTestProxy({
|
||||
statusCode: 200,
|
||||
|
||||
Reference in New Issue
Block a user