diff --git a/sdk/typescript/README.md b/sdk/typescript/README.md index 1aeffce6..df368923 100644 --- a/sdk/typescript/README.md +++ b/sdk/typescript/README.md @@ -83,6 +83,18 @@ const turn = await thread.run("Summarize repository status", { console.log(turn.finalResponse); ``` +### Attaching images + +Provide structured input entries when you need to include images alongside text. Text entries are concatenated into the final prompt while image entries are passed to the Codex CLI via `--image`. + +```typescript +const turn = await thread.run([ + { type: "text", text: "Describe these screenshots" }, + { type: "local_image", path: "./ui.png" }, + { type: "local_image", path: "./diagram.jpg" }, +]); +``` + ### Resuming an existing thread Threads are persisted in `~/.codex/sessions`. If you lose the in-memory `Thread` object, reconstruct it with `resumeThread()` and keep going. @@ -95,7 +107,7 @@ await thread.run("Implement the fix"); ### Working directory controls -Codex runs in the current working directory by default. To avoid unrecoverable errors, Codex requires the working directory to be a Git repository. You can skip the Git repository check by passing the `skipGitRepoCheck` option when creating a thread. +Codex runs in the current working directory by default. To avoid unrecoverable errors, Codex requires the working directory to be a Git repository. You can skip the Git repository check by passing the `skipGitRepoCheck` option when creating a thread. ```typescript const thread = codex.startThread({ diff --git a/sdk/typescript/src/exec.ts b/sdk/typescript/src/exec.ts index b1cf4c41..45ae5fc6 100644 --- a/sdk/typescript/src/exec.ts +++ b/sdk/typescript/src/exec.ts @@ -11,6 +11,7 @@ export type CodexExecArgs = { baseUrl?: string; apiKey?: string; threadId?: string | null; + images?: string[]; // --model model?: string; // --sandbox @@ -55,6 +56,12 @@ export class CodexExec { commandArgs.push("--output-schema", args.outputSchemaFile); } + if (args.images?.length) { + for (const image of args.images) { + commandArgs.push("--image", image); + } + } + if (args.threadId) { commandArgs.push("resume", args.threadId); } diff --git a/sdk/typescript/src/index.ts b/sdk/typescript/src/index.ts index 6e1ace9d..8e6a71df 100644 --- a/sdk/typescript/src/index.ts +++ b/sdk/typescript/src/index.ts @@ -24,7 +24,7 @@ export type { } from "./items"; export { Thread } from "./thread"; -export type { RunResult, RunStreamedResult, Input } from "./thread"; +export type { RunResult, RunStreamedResult, Input, UserInput } from "./thread"; export { Codex } from "./codex"; diff --git a/sdk/typescript/src/thread.ts b/sdk/typescript/src/thread.ts index 0caf52b4..2306df25 100644 --- a/sdk/typescript/src/thread.ts +++ b/sdk/typescript/src/thread.ts @@ -25,7 +25,17 @@ export type StreamedTurn = { export type RunStreamedResult = StreamedTurn; /** An input to send to the agent. */ -export type Input = string; +export type UserInput = + | { + type: "text"; + text: string; + } + | { + type: "local_image"; + path: string; + }; + +export type Input = string | UserInput[]; /** Respesent a thread of conversation with the agent. One thread can have multiple consecutive turns. */ export class Thread { @@ -53,21 +63,23 @@ export class Thread { } /** Provides the input to the agent and streams events as they are produced during the turn. */ - async runStreamed(input: string, turnOptions: TurnOptions = {}): Promise { + async runStreamed(input: Input, turnOptions: TurnOptions = {}): Promise { return { events: this.runStreamedInternal(input, turnOptions) }; } private async *runStreamedInternal( - input: string, + input: Input, turnOptions: TurnOptions = {}, ): AsyncGenerator { const { schemaPath, cleanup } = await createOutputSchemaFile(turnOptions.outputSchema); const options = this._threadOptions; + const { prompt, images } = normalizeInput(input); const generator = this._exec.run({ - input, + input: prompt, baseUrl: this._options.baseUrl, apiKey: this._options.apiKey, threadId: this._id, + images, model: options?.model, sandboxMode: options?.sandboxMode, workingDirectory: options?.workingDirectory, @@ -93,7 +105,7 @@ export class Thread { } /** Provides the input to the agent and returns the completed turn. */ - async run(input: string, turnOptions: TurnOptions = {}): Promise { + async run(input: Input, turnOptions: TurnOptions = {}): Promise { const generator = this.runStreamedInternal(input, turnOptions); const items: ThreadItem[] = []; let finalResponse: string = ""; @@ -118,3 +130,19 @@ export class Thread { return { items, finalResponse, usage }; } } + +function normalizeInput(input: Input): { prompt: string; images: string[] } { + if (typeof input === "string") { + return { prompt: input, images: [] }; + } + const promptParts: string[] = []; + const images: string[] = []; + for (const item of input) { + if (item.type === "text") { + promptParts.push(item.text); + } else if (item.type === "local_image") { + images.push(item.path); + } + } + return { prompt: promptParts.join("\n\n"), images }; +} diff --git a/sdk/typescript/tests/run.test.ts b/sdk/typescript/tests/run.test.ts index de7ef555..f495b1e6 100644 --- a/sdk/typescript/tests/run.test.ts +++ b/sdk/typescript/tests/run.test.ts @@ -279,6 +279,82 @@ describe("Codex", () => { await close(); } }); + it("combines structured text input segments", async () => { + const { url, close, requests } = await startResponsesTestProxy({ + statusCode: 200, + responseBodies: [ + sse( + responseStarted("response_1"), + assistantMessage("Combined input applied", "item_1"), + responseCompleted("response_1"), + ), + ], + }); + + try { + const client = new Codex({ codexPathOverride: codexExecPath, baseUrl: url, apiKey: "test" }); + + const thread = client.startThread(); + await thread.run([ + { type: "text", text: "Describe file changes" }, + { type: "text", text: "Focus on impacted tests" }, + ]); + + const payload = requests[0]; + expect(payload).toBeDefined(); + const lastUser = payload!.json.input.at(-1); + expect(lastUser?.content?.[0]?.text).toBe("Describe file changes\n\nFocus on impacted tests"); + } finally { + await close(); + } + }); + it("forwards images to exec", async () => { + const { url, close } = await startResponsesTestProxy({ + statusCode: 200, + responseBodies: [ + sse( + responseStarted("response_1"), + assistantMessage("Images applied", "item_1"), + responseCompleted("response_1"), + ), + ], + }); + + const { args: spawnArgs, restore } = codexExecSpy(); + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "codex-images-")); + const imagesDirectoryEntries: [string, string] = [ + path.join(tempDir, "first.png"), + path.join(tempDir, "second.jpg"), + ]; + imagesDirectoryEntries.forEach((image, index) => { + fs.writeFileSync(image, `image-${index}`); + }); + + try { + const client = new Codex({ codexPathOverride: codexExecPath, baseUrl: url, apiKey: "test" }); + + const thread = client.startThread(); + await thread.run([ + { type: "text", text: "describe the images" }, + { type: "local_image", path: imagesDirectoryEntries[0] }, + { type: "local_image", path: imagesDirectoryEntries[1] }, + ]); + + const commandArgs = spawnArgs[0]; + expect(commandArgs).toBeDefined(); + const forwardedImages: string[] = []; + for (let i = 0; i < commandArgs!.length; i += 1) { + if (commandArgs![i] === "--image") { + forwardedImages.push(commandArgs![i + 1] ?? ""); + } + } + expect(forwardedImages).toEqual(imagesDirectoryEntries); + } finally { + fs.rmSync(tempDir, { recursive: true, force: true }); + restore(); + await close(); + } + }); it("runs in provided working directory", async () => { const { url, close } = await startResponsesTestProxy({ statusCode: 200,