diff --git a/codex-rs/exec/src/exec_events.rs b/codex-rs/exec/src/exec_events.rs index f52071d4..4ea57e19 100644 --- a/codex-rs/exec/src/exec_events.rs +++ b/codex-rs/exec/src/exec_events.rs @@ -2,34 +2,45 @@ use serde::Deserialize; use serde::Serialize; use ts_rs::TS; -/// Top-level events emitted on the Codex Exec thread stream. +/// Top-level JSONL events emitted by codex exec #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[serde(tag = "type")] pub enum ThreadEvent { + /// Emitted when a new thread is started as the first event. #[serde(rename = "thread.started")] ThreadStarted(ThreadStartedEvent), + /// Emitted when a turn is started by sending a new prompt to the model. + /// A turn encompasses all events that happen while agent is processing the prompt. #[serde(rename = "turn.started")] TurnStarted(TurnStartedEvent), + /// Emitted when a turn is completed. Typically right after the assistant's response. #[serde(rename = "turn.completed")] TurnCompleted(TurnCompletedEvent), + /// Indicates that a turn failed with an error. #[serde(rename = "turn.failed")] TurnFailed(TurnFailedEvent), + /// Emitted when a new item is added to the thread. Typically the item will be in an "in progress" state. #[serde(rename = "item.started")] ItemStarted(ItemStartedEvent), + /// Emitted when an item is updated. #[serde(rename = "item.updated")] ItemUpdated(ItemUpdatedEvent), + /// Signals that an item has reached a terminal state—either success or failure. #[serde(rename = "item.completed")] ItemCompleted(ItemCompletedEvent), + /// Represents an unrecoverable error emitted directly by the event stream. #[serde(rename = "error")] Error(ThreadErrorEvent), } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] pub struct ThreadStartedEvent { + /// The identified of the new thread. Can be used to resume the thread later. pub thread_id: String, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS, Default)] + pub struct TurnStartedEvent {} #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] @@ -42,11 +53,14 @@ pub struct TurnFailedEvent { pub error: ThreadErrorEvent, } -/// Minimal usage summary for a turn. +/// Describes the usage of tokens during a turn. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS, Default)] pub struct Usage { + /// The number of input tokens used during the turn. pub input_tokens: u64, + /// The number of cached input tokens used during the turn. pub cached_input_tokens: u64, + /// The number of output tokens used during the turn. pub output_tokens: u64, } @@ -83,34 +97,44 @@ pub struct ThreadItem { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[serde(tag = "item_type", rename_all = "snake_case")] pub enum ThreadItemDetails { + /// Response from the agent. + /// Either a natural-language response or a JSON string when structured output is requested. AssistantMessage(AssistantMessageItem), + /// Agent's reasoning summary. Reasoning(ReasoningItem), + /// Tracks a command executed by the agent. The item starts when the command is + /// spawned, and completes when the process exits with an exit code. CommandExecution(CommandExecutionItem), + /// Represents a set of file changes by the agent. The item is emitted only as a + /// completed event once the patch succeeds or fails. FileChange(FileChangeItem), + /// Represents a call to an MCP tool. The item starts when the invocation is + /// dispatched and completes when the MCP server reports success or failure. McpToolCall(McpToolCallItem), + /// Captures a web search request. It starts when the search is kicked off + /// and completes when results are returned to the agent. WebSearch(WebSearchItem), + /// Tracks the agent's running to-do list. It starts when the plan is first + /// issued, updates as steps change state, and completes when the turn ends. TodoList(TodoListItem), + /// Describes a non-fatal error surfaced as an item. Error(ErrorItem), } -/// Session metadata. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] -pub struct SessionItem { - pub session_id: String, -} - -/// Assistant message payload. +/// Response from the agent. +/// Either a natural-language response or a JSON string when structured output is requested. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] pub struct AssistantMessageItem { pub text: String, } -/// Model reasoning summary payload. +/// Agent's reasoning summary. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] pub struct ReasoningItem { pub text: String, } +/// The status of a command execution. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default, TS)] #[serde(rename_all = "snake_case")] pub enum CommandExecutionStatus { @@ -120,7 +144,7 @@ pub enum CommandExecutionStatus { Failed, } -/// Local shell command execution payload. +/// A command executed by the agent. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] pub struct CommandExecutionItem { pub command: String, @@ -130,13 +154,14 @@ pub struct CommandExecutionItem { pub status: CommandExecutionStatus, } -/// Single file change summary for a patch. +/// A set of file changes by the agent. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] pub struct FileUpdateChange { pub path: String, pub kind: PatchChangeKind, } +/// The status of a file change. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[serde(rename_all = "snake_case")] pub enum PatchApplyStatus { @@ -144,14 +169,14 @@ pub enum PatchApplyStatus { Failed, } -/// Patch application payload. +/// A set of file changes by the agent. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] pub struct FileChangeItem { pub changes: Vec, pub status: PatchApplyStatus, } -/// Known change kinds for a patch. +/// Indicates the type of the file change. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[serde(rename_all = "snake_case")] pub enum PatchChangeKind { @@ -160,6 +185,7 @@ pub enum PatchChangeKind { Update, } +/// The status of an MCP tool call. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default, TS)] #[serde(rename_all = "snake_case")] pub enum McpToolCallStatus { @@ -169,6 +195,7 @@ pub enum McpToolCallStatus { Failed, } +/// A call to an MCP tool. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] pub struct McpToolCallItem { pub server: String, @@ -176,16 +203,19 @@ pub struct McpToolCallItem { pub status: McpToolCallStatus, } +/// A web search request. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] pub struct WebSearchItem { pub query: String, } +/// An error notification. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] pub struct ErrorItem { pub message: String, } +/// An item in agent's to-do list. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] pub struct TodoItem { pub text: String, diff --git a/sdk/typescript/README.md b/sdk/typescript/README.md index b70c4f4a..19accb0b 100644 --- a/sdk/typescript/README.md +++ b/sdk/typescript/README.md @@ -1 +1,53 @@ # Codex SDK + +Bring the power of the best coding agent to your application. + +## Installation + +```bash +npm install @openai/codex-sdk +``` + +## Usage + +Call `startThread()` and `run()` to start a thead with Codex. + +```typescript +import { Codex } from "@openai/codex-sdk"; + +const codex = new Codex(); +const thread = codex.startThread(); +const result = await thread.run("Diagnose the test failure and propose a fix"); + +console.log(result); +``` + +You can call `run()` again to continue the same thread. + +```typescript +const result = await thread.run("Implement the fix"); + +console.log(result); +``` + +### Streaming + +The `await run()` method completes when a thread turn is complete and agent is prepared the final response. + +You can thread items while they are being produced by calling `await runStreamed()`. + +```typescript +const result = thread.runStreamed("Diagnose the test failure and propose a fix"); +``` + +### Resuming a thread + +If you don't have the original `Thread` instance to continue the thread, you can resume a thread by calling `resumeThread()` and providing the thread. + +```typescript +const threadId = "..."; +const thread = codex.resumeThread(threadId); +const result = await thread.run("Implement the fix"); + +console.log(result); +``` diff --git a/sdk/typescript/src/codex.ts b/sdk/typescript/src/codex.ts index 331b7e6e..8f871228 100644 --- a/sdk/typescript/src/codex.ts +++ b/sdk/typescript/src/codex.ts @@ -2,6 +2,11 @@ import { CodexOptions } from "./codexOptions"; import { CodexExec } from "./exec"; import { Thread } from "./thread"; +/** + * Codex is the main class for interacting with the Codex agent. + * + * Use the `startThread()` method to start a new thread or `resumeThread()` to resume a previously started thread. + */ export class Codex { private exec: CodexExec; private options: CodexOptions; @@ -11,10 +16,21 @@ export class Codex { this.options = options; } + /** + * Starts a new conversation with an agent. + * @returns A new thread instance. + */ startThread(): Thread { return new Thread(this.exec, this.options); } + /** + * Resumes a conversation with an agent based on the thread id. + * Threads are persisted in ~/.codex/sessions. + * + * @param id The id of the thread to resume. + * @returns A new thread instance. + */ resumeThread(id: string): Thread { return new Thread(this.exec, this.options, id); } diff --git a/sdk/typescript/src/events.ts b/sdk/typescript/src/events.ts index 344e348a..b8adcfb4 100644 --- a/sdk/typescript/src/events.ts +++ b/sdk/typescript/src/events.ts @@ -2,55 +2,73 @@ import type { ThreadItem } from "./items"; +/** Emitted when a new thread is started as the first event. */ export type ThreadStartedEvent = { type: "thread.started"; + /** The identifier of the new thread. Can be used to resume the thread later. */ thread_id: string; }; +/** + * Emitted when a turn is started by sending a new prompt to the model. + * A turn encompasses all events that happen while the agent is processing the prompt. + */ export type TurnStartedEvent = { type: "turn.started"; }; +/** Describes the usage of tokens during a turn. */ export type Usage = { + /** The number of input tokens used during the turn. */ input_tokens: number; + /** The number of cached input tokens used during the turn. */ cached_input_tokens: number; + /** The number of output tokens used during the turn. */ output_tokens: number; }; +/** Emitted when a turn is completed. Typically right after the assistant's response. */ export type TurnCompletedEvent = { type: "turn.completed"; usage: Usage; }; +/** Indicates that a turn failed with an error. */ export type TurnFailedEvent = { type: "turn.failed"; error: ThreadError; }; +/** Emitted when a new item is added to the thread. Typically the item is initially "in progress". */ export type ItemStartedEvent = { type: "item.started"; item: ThreadItem; }; +/** Emitted when an item is updated. */ export type ItemUpdatedEvent = { type: "item.updated"; item: ThreadItem; }; +/** Signals that an item has reached a terminal state—either success or failure. */ export type ItemCompletedEvent = { type: "item.completed"; item: ThreadItem; }; +/** Fatal error emitted by the stream. */ export type ThreadError = { message: string; }; +/** Represents an unrecoverable error emitted directly by the event stream. */ export type ThreadErrorEvent = { type: "error"; message: string; }; +/** Top-level JSONL events emitted by codex exec. */ export type ThreadEvent = | ThreadStartedEvent | TurnStartedEvent diff --git a/sdk/typescript/src/index.ts b/sdk/typescript/src/index.ts index 5fa6d5bc..f2f84d15 100644 --- a/sdk/typescript/src/index.ts +++ b/sdk/typescript/src/index.ts @@ -22,7 +22,8 @@ export type { ErrorItem, } from "./items"; -export { Thread, RunResult, RunStreamedResult, Input } from "./thread"; +export { Thread } from "./thread"; +export type { RunResult, RunStreamedResult, Input } from "./thread"; export { Codex } from "./codex"; diff --git a/sdk/typescript/src/items.ts b/sdk/typescript/src/items.ts index 546dd66f..85f54d78 100644 --- a/sdk/typescript/src/items.ts +++ b/sdk/typescript/src/items.ts @@ -1,71 +1,101 @@ // based on item types from codex-rs/exec/src/exec_events.rs +/** The status of a command execution. */ export type CommandExecutionStatus = "in_progress" | "completed" | "failed"; +/** A command executed by the agent. */ export type CommandExecutionItem = { id: string; item_type: "command_execution"; + /** The command line executed by the agent. */ command: string; + /** Aggregated stdout and stderr captured while the command was running. */ aggregated_output: string; + /** Set when the command exits; omitted while still running. */ exit_code?: number; + /** Current status of the command execution. */ status: CommandExecutionStatus; }; +/** Indicates the type of the file change. */ export type PatchChangeKind = "add" | "delete" | "update"; +/** A set of file changes by the agent. */ export type FileUpdateChange = { path: string; kind: PatchChangeKind; }; +/** The status of a file change. */ export type PatchApplyStatus = "completed" | "failed"; +/** A set of file changes by the agent. Emitted once the patch succeeds or fails. */ export type FileChangeItem = { id: string; item_type: "file_change"; + /** Individual file changes that comprise the patch. */ changes: FileUpdateChange[]; + /** Whether the patch ultimately succeeded or failed. */ status: PatchApplyStatus; }; +/** The status of an MCP tool call. */ export type McpToolCallStatus = "in_progress" | "completed" | "failed"; +/** + * Represents a call to an MCP tool. The item starts when the invocation is dispatched + * and completes when the MCP server reports success or failure. + */ export type McpToolCallItem = { id: string; item_type: "mcp_tool_call"; + /** Name of the MCP server handling the request. */ server: string; + /** The tool invoked on the MCP server. */ tool: string; + /** Current status of the tool invocation. */ status: McpToolCallStatus; }; +/** Response from the agent. Either natural-language text or JSON when structured output is requested. */ export type AssistantMessageItem = { id: string; item_type: "assistant_message"; + /** Either natural-language text or JSON when structured output is requested. */ text: string; }; +/** Agent's reasoning summary. */ export type ReasoningItem = { id: string; item_type: "reasoning"; text: string; }; +/** Captures a web search request. Completes when results are returned to the agent. */ export type WebSearchItem = { id: string; item_type: "web_search"; query: string; }; +/** Describes a non-fatal error surfaced as an item. */ export type ErrorItem = { id: string; item_type: "error"; message: string; }; +/** An item in the agent's to-do list. */ export type TodoItem = { text: string; completed: boolean; }; +/** + * Tracks the agent's running to-do list. Starts when the plan is issued, updates as steps change, + * and completes when the turn ends. + */ export type TodoListItem = { id: string; item_type: "todo_list"; @@ -78,6 +108,7 @@ export type SessionItem = { session_id: string; }; +/** Canonical union of thread items and their type-specific payloads. */ export type ThreadItem = | AssistantMessageItem | ReasoningItem diff --git a/sdk/typescript/src/thread.ts b/sdk/typescript/src/thread.ts index de2c2c94..7ec71021 100644 --- a/sdk/typescript/src/thread.ts +++ b/sdk/typescript/src/thread.ts @@ -4,29 +4,45 @@ import { CodexExec } from "./exec"; import { ThreadItem } from "./items"; import { TurnOptions } from "./turnOptions"; -export type RunResult = { +/** Completed turn. */ +export type Turn = { items: ThreadItem[]; finalResponse: string; }; -export type RunStreamedResult = { +/** Alias for `Turn` to describe the result of `run()`. */ +export type RunResult = Turn; + +/** The result of the `runStreamed` method. */ +export type StreamedTurn = { events: AsyncGenerator; }; +/** Alias for `StreamedTurn` to describe the result of `runStreamed()`. */ +export type RunStreamedResult = StreamedTurn; + +/** An input to send to the agent. */ export type Input = string; +/** Respesent a thread of conversation with the agent. One thread can have multiple consecutive turns. */ export class Thread { - private exec: CodexExec; - private options: CodexOptions; - public id: string | null; + private _exec: CodexExec; + private _options: CodexOptions; + private _id: string | null; - constructor(exec: CodexExec, options: CodexOptions, id: string | null = null) { - this.exec = exec; - this.options = options; - this.id = id; + /** Returns the ID of the thread. Populated after the first turn starts. */ + public get id(): string | null { + return this._id; } - async runStreamed(input: string, options?: TurnOptions): Promise { + constructor(exec: CodexExec, options: CodexOptions, id: string | null = null) { + this._exec = exec; + this._options = options; + this._id = id; + } + + /** Provides the input to the agent and streams events as they are produced during the turn. */ + async runStreamed(input: string, options?: TurnOptions): Promise { return { events: this.runStreamedInternal(input, options) }; } @@ -34,26 +50,32 @@ export class Thread { input: string, options?: TurnOptions, ): AsyncGenerator { - const generator = this.exec.run({ + const generator = this._exec.run({ input, - baseUrl: this.options.baseUrl, - apiKey: this.options.apiKey, - threadId: this.id, + baseUrl: this._options.baseUrl, + apiKey: this._options.apiKey, + threadId: this._id, model: options?.model, sandboxMode: options?.sandboxMode, workingDirectory: options?.workingDirectory, skipGitRepoCheck: options?.skipGitRepoCheck, }); for await (const item of generator) { - const parsed = JSON.parse(item) as ThreadEvent; + let parsed: ThreadEvent; + try { + parsed = JSON.parse(item) as ThreadEvent; + } catch (error) { + throw new Error(`Failed to parse item: ${item}`, { cause: error }); + } if (parsed.type === "thread.started") { - this.id = parsed.thread_id; + this._id = parsed.thread_id; } yield parsed; } } - async run(input: string, options?: TurnOptions): Promise { + /** Provides the input to the agent and returns the completed turn. */ + async run(input: string, options?: TurnOptions): Promise { const generator = this.runStreamedInternal(input, options); const items: ThreadItem[] = []; let finalResponse: string = ""; diff --git a/sdk/typescript/tests/run.test.ts b/sdk/typescript/tests/run.test.ts index 5b8de3c8..2f15237f 100644 --- a/sdk/typescript/tests/run.test.ts +++ b/sdk/typescript/tests/run.test.ts @@ -218,7 +218,6 @@ describe("Codex", () => { await close(); } }); - it("runs in provided working directory", async () => { const { url, close } = await startResponsesTestProxy({ statusCode: 200, @@ -286,7 +285,6 @@ describe("Codex", () => { } }); }); - function expectPair(args: string[] | undefined, pair: [string, string]) { if (!args) { throw new Error("Args is undefined");