From fdb8dadcae9f8eec91bc3eb5a17b3f9b19e28505 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Tue, 23 Sep 2025 13:59:16 -0700 Subject: [PATCH] Add exec output-schema parameter (#4079) Adds structured output to `exec` via the `--structured-output` parameter. --- codex-rs/core/src/chat_completions.rs | 6 ++ codex-rs/core/src/client.rs | 2 +- codex-rs/core/src/client_common.rs | 87 ++++++++++++++++- codex-rs/core/src/codex.rs | 9 ++ codex-rs/core/src/codex/compact.rs | 1 + codex-rs/core/src/error.rs | 3 + codex-rs/core/tests/suite/json_result.rs | 97 +++++++++++++++++++ codex-rs/core/tests/suite/mod.rs | 1 + codex-rs/core/tests/suite/prompt_caching.rs | 5 + codex-rs/exec/src/cli.rs | 4 + codex-rs/exec/src/lib.rs | 52 +++++++++- codex-rs/exec/tests/suite/mod.rs | 1 + codex-rs/exec/tests/suite/output_schema.rs | 76 +++++++++++++++ .../mcp-server/src/codex_message_processor.rs | 1 + codex-rs/protocol/src/protocol.rs | 3 + 15 files changed, 341 insertions(+), 7 deletions(-) create mode 100644 codex-rs/core/tests/suite/json_result.rs create mode 100644 codex-rs/exec/tests/suite/output_schema.rs diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs index a32b59b5..eddc7864 100644 --- a/codex-rs/core/src/chat_completions.rs +++ b/codex-rs/core/src/chat_completions.rs @@ -35,6 +35,12 @@ pub(crate) async fn stream_chat_completions( client: &reqwest::Client, provider: &ModelProviderInfo, ) -> Result { + if prompt.output_schema.is_some() { + return Err(CodexErr::UnsupportedOperation( + "output_schema is not supported for Chat Completions API".to_string(), + )); + } + // Build messages array let mut messages = Vec::::new(); diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index 72ca770a..f15983e8 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -185,7 +185,7 @@ impl ModelClient { // Only include `text.verbosity` for GPT-5 family models let text = if self.config.model_family.family == "gpt-5" { - create_text_param_for_request(self.config.model_verbosity) + create_text_param_for_request(self.config.model_verbosity, &prompt.output_schema) } else { if self.config.model_verbosity.is_some() { warn!( diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index 15bfb5d4..b8a6cf47 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -10,6 +10,7 @@ use codex_protocol::config_types::Verbosity as VerbosityConfig; use codex_protocol::models::ResponseItem; use futures::Stream; use serde::Serialize; +use serde_json::Value; use std::borrow::Cow; use std::ops::Deref; use std::pin::Pin; @@ -32,6 +33,9 @@ pub struct Prompt { /// Optional override for the built-in BASE_INSTRUCTIONS. pub base_instructions_override: Option, + + /// Optional the output schema for the model's response. + pub output_schema: Option, } impl Prompt { @@ -90,14 +94,31 @@ pub(crate) struct Reasoning { pub(crate) summary: Option, } +#[derive(Debug, Serialize, Default, Clone)] +#[serde(rename_all = "snake_case")] +pub(crate) enum TextFormatType { + #[default] + JsonSchema, +} + +#[derive(Debug, Serialize, Default, Clone)] +pub(crate) struct TextFormat { + pub(crate) r#type: TextFormatType, + pub(crate) strict: bool, + pub(crate) schema: Value, + pub(crate) name: String, +} + /// Controls under the `text` field in the Responses API for GPT-5. -#[derive(Debug, Serialize, Default, Clone, Copy)] +#[derive(Debug, Serialize, Default, Clone)] pub(crate) struct TextControls { #[serde(skip_serializing_if = "Option::is_none")] pub(crate) verbosity: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) format: Option, } -#[derive(Debug, Serialize, Default, Clone, Copy)] +#[derive(Debug, Serialize, Default, Clone)] #[serde(rename_all = "lowercase")] pub(crate) enum OpenAiVerbosity { Low, @@ -156,9 +177,20 @@ pub(crate) fn create_reasoning_param_for_request( pub(crate) fn create_text_param_for_request( verbosity: Option, + output_schema: &Option, ) -> Option { - verbosity.map(|v| TextControls { - verbosity: Some(v.into()), + if verbosity.is_none() && output_schema.is_none() { + return None; + } + + Some(TextControls { + verbosity: verbosity.map(std::convert::Into::into), + format: output_schema.as_ref().map(|schema| TextFormat { + r#type: TextFormatType::JsonSchema, + strict: true, + schema: schema.clone(), + name: "codex_output_schema".to_string(), + }), }) } @@ -255,6 +287,7 @@ mod tests { prompt_cache_key: None, text: Some(TextControls { verbosity: Some(OpenAiVerbosity::Low), + format: None, }), }; @@ -267,6 +300,52 @@ mod tests { ); } + #[test] + fn serializes_text_schema_with_strict_format() { + let input: Vec = vec![]; + let tools: Vec = vec![]; + let schema = serde_json::json!({ + "type": "object", + "properties": { + "answer": {"type": "string"} + }, + "required": ["answer"], + }); + let text_controls = + create_text_param_for_request(None, &Some(schema.clone())).expect("text controls"); + + let req = ResponsesApiRequest { + model: "gpt-5", + instructions: "i", + input: &input, + tools: &tools, + tool_choice: "auto", + parallel_tool_calls: false, + reasoning: None, + store: false, + stream: true, + include: vec![], + prompt_cache_key: None, + text: Some(text_controls), + }; + + let v = serde_json::to_value(&req).expect("json"); + let text = v.get("text").expect("text field"); + assert!(text.get("verbosity").is_none()); + let format = text.get("format").expect("format field"); + + assert_eq!( + format.get("name"), + Some(&serde_json::Value::String("codex_output_schema".into())) + ); + assert_eq!( + format.get("type"), + Some(&serde_json::Value::String("json_schema".into())) + ); + assert_eq!(format.get("strict"), Some(&serde_json::Value::Bool(true))); + assert_eq!(format.get("schema"), Some(&schema)); + } + #[test] fn omits_text_when_not_set() { let input: Vec = vec![]; diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 49ab6940..c441b0ec 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -31,6 +31,7 @@ use mcp_types::CallToolResult; use serde::Deserialize; use serde::Serialize; use serde_json; +use serde_json::Value; use tokio::sync::Mutex; use tokio::sync::oneshot; use tokio::task::AbortHandle; @@ -302,6 +303,7 @@ pub(crate) struct TurnContext { pub(crate) shell_environment_policy: ShellEnvironmentPolicy, pub(crate) tools_config: ToolsConfig, pub(crate) is_review_mode: bool, + pub(crate) final_output_json_schema: Option, } impl TurnContext { @@ -469,6 +471,7 @@ impl Session { shell_environment_policy: config.shell_environment_policy.clone(), cwd, is_review_mode: false, + final_output_json_schema: None, }; let sess = Arc::new(Session { conversation_id, @@ -1237,6 +1240,7 @@ async fn submission_loop( shell_environment_policy: prev.shell_environment_policy.clone(), cwd: new_cwd.clone(), is_review_mode: false, + final_output_json_schema: None, }; // Install the new persistent context for subsequent tasks/turns. @@ -1271,6 +1275,7 @@ async fn submission_loop( model, effort, summary, + final_output_json_schema, } => { // attempt to inject input into current task if let Err(items) = sess.inject_input(items).await { @@ -1321,6 +1326,7 @@ async fn submission_loop( shell_environment_policy: turn_context.shell_environment_policy.clone(), cwd, is_review_mode: false, + final_output_json_schema, }; // if the environment context has changed, record it in the conversation history @@ -1575,6 +1581,7 @@ async fn spawn_review_thread( shell_environment_policy: parent_turn_context.shell_environment_policy.clone(), cwd: parent_turn_context.cwd.clone(), is_review_mode: true, + final_output_json_schema: None, }; // Seed the child task with the review prompt as the initial user message. @@ -1941,6 +1948,7 @@ async fn run_turn( input, tools, base_instructions_override: turn_context.base_instructions.clone(), + output_schema: turn_context.final_output_json_schema.clone(), }; let mut retries = 0; @@ -3604,6 +3612,7 @@ mod tests { shell_environment_policy: config.shell_environment_policy.clone(), tools_config, is_review_mode: false, + final_output_json_schema: None, }; let session = Session { conversation_id, diff --git a/codex-rs/core/src/codex/compact.rs b/codex-rs/core/src/codex/compact.rs index a60466b9..016f406f 100644 --- a/codex-rs/core/src/codex/compact.rs +++ b/codex-rs/core/src/codex/compact.rs @@ -106,6 +106,7 @@ async fn run_compact_task_inner( input: turn_input, tools: Vec::new(), base_instructions_override: instructions_override, + output_schema: None, }; let max_retries = turn_context.client.get_provider().stream_max_retries(); diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 77b447e5..a5150abd 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -104,6 +104,9 @@ pub enum CodexErr { #[error("codex-linux-sandbox was required but not provided")] LandlockSandboxExecutableNotProvided, + #[error("unsupported operation: {0}")] + UnsupportedOperation(String), + // ----------------------------------------------------------------- // Automatic conversions for common external error types // ----------------------------------------------------------------- diff --git a/codex-rs/core/tests/suite/json_result.rs b/codex-rs/core/tests/suite/json_result.rs new file mode 100644 index 00000000..43d5cb4b --- /dev/null +++ b/codex-rs/core/tests/suite/json_result.rs @@ -0,0 +1,97 @@ +#![cfg(not(target_os = "windows"))] + +use codex_core::protocol::AskForApproval; +use codex_core::protocol::EventMsg; +use codex_core::protocol::InputItem; +use codex_core::protocol::Op; +use codex_core::protocol::SandboxPolicy; +use codex_protocol::config_types::ReasoningSummary; +use core_test_support::non_sandbox_test; +use core_test_support::responses; +use core_test_support::test_codex::TestCodex; +use core_test_support::test_codex::test_codex; +use core_test_support::wait_for_event; +use pretty_assertions::assert_eq; +use responses::ev_assistant_message; +use responses::ev_completed; +use responses::sse; +use responses::start_mock_server; + +const SCHEMA: &str = r#" +{ + "type": "object", + "properties": { + "explanation": { "type": "string" }, + "final_answer": { "type": "string" } + }, + "required": ["explanation", "final_answer"], + "additionalProperties": false +} +"#; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn codex_returns_json_result() -> anyhow::Result<()> { + non_sandbox_test!(result); + + let server = start_mock_server().await; + + let sse1 = sse(vec![ + ev_assistant_message( + "m2", + r#"{"explanation": "explanation", "final_answer": "final_answer"}"#, + ), + ev_completed("r1"), + ]); + + let expected_schema: serde_json::Value = serde_json::from_str(SCHEMA)?; + let match_json_text_param = move |req: &wiremock::Request| { + let body: serde_json::Value = serde_json::from_slice(&req.body).unwrap_or_default(); + let Some(text) = body.get("text") else { + return false; + }; + let Some(format) = text.get("format") else { + return false; + }; + + format.get("name") == Some(&serde_json::Value::String("codex_output_schema".into())) + && format.get("type") == Some(&serde_json::Value::String("json_schema".into())) + && format.get("strict") == Some(&serde_json::Value::Bool(true)) + && format.get("schema") == Some(&expected_schema) + }; + responses::mount_sse_once(&server, match_json_text_param, sse1).await; + + let TestCodex { codex, cwd, .. } = test_codex().build(&server).await?; + + // 1) Normal user input – should hit server once. + codex + .submit(Op::UserTurn { + items: vec![InputItem::Text { + text: "hello world".into(), + }], + final_output_json_schema: Some(serde_json::from_str(SCHEMA)?), + cwd: cwd.path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::DangerFullAccess, + model: "gpt-5".to_string(), + effort: None, + summary: ReasoningSummary::Auto, + }) + .await?; + + let message = wait_for_event(&codex, |ev| matches!(ev, EventMsg::AgentMessage(_))).await; + if let EventMsg::AgentMessage(message) = message { + let json: serde_json::Value = serde_json::from_str(&message.message)?; + assert_eq!( + json.get("explanation"), + Some(&serde_json::Value::String("explanation".into())) + ); + assert_eq!( + json.get("final_answer"), + Some(&serde_json::Value::String("final_answer".into())) + ); + } else { + anyhow::bail!("expected agent message event"); + } + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index d4fb4460..2d91e330 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -7,6 +7,7 @@ mod compact_resume_fork; mod exec; mod exec_stream_events; mod fork_conversation; +mod json_result; mod live_cli; mod model_overrides; mod prompt_caching; diff --git a/codex-rs/core/tests/suite/prompt_caching.rs b/codex-rs/core/tests/suite/prompt_caching.rs index 6cfe6f4d..147cf27d 100644 --- a/codex-rs/core/tests/suite/prompt_caching.rs +++ b/codex-rs/core/tests/suite/prompt_caching.rs @@ -546,6 +546,7 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() { model: "o3".to_string(), effort: Some(ReasoningEffort::High), summary: ReasoningSummary::Detailed, + final_output_json_schema: None, }) .await .unwrap(); @@ -655,6 +656,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() { model: default_model.clone(), effort: default_effort, summary: default_summary, + final_output_json_schema: None, }) .await .unwrap(); @@ -671,6 +673,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() { model: default_model.clone(), effort: default_effort, summary: default_summary, + final_output_json_schema: None, }) .await .unwrap(); @@ -766,6 +769,7 @@ async fn send_user_turn_with_changes_sends_environment_context() { model: default_model, effort: default_effort, summary: default_summary, + final_output_json_schema: None, }) .await .unwrap(); @@ -782,6 +786,7 @@ async fn send_user_turn_with_changes_sends_environment_context() { model: "o3".to_string(), effort: Some(ReasoningEffort::High), summary: ReasoningSummary::Detailed, + final_output_json_schema: None, }) .await .unwrap(); diff --git a/codex-rs/exec/src/cli.rs b/codex-rs/exec/src/cli.rs index 19093ec9..5774fc18 100644 --- a/codex-rs/exec/src/cli.rs +++ b/codex-rs/exec/src/cli.rs @@ -52,6 +52,10 @@ pub struct Cli { #[arg(long = "skip-git-repo-check", default_value_t = false)] pub skip_git_repo_check: bool, + /// Path to a JSON Schema file describing the model's final response shape. + #[arg(long = "output-schema", value_name = "FILE")] + pub output_schema: Option, + #[clap(skip)] pub config_overrides: CliConfigOverrides, diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index 097231da..ed8b26c1 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -25,6 +25,7 @@ use codex_ollama::DEFAULT_OSS_MODEL; use codex_protocol::config_types::SandboxMode; use event_processor_with_human_output::EventProcessorWithHumanOutput; use event_processor_with_json_output::EventProcessorWithJsonOutput; +use serde_json::Value; use tracing::debug; use tracing::error; use tracing::info; @@ -51,6 +52,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any json: json_mode, sandbox_mode: sandbox_mode_cli_arg, prompt, + output_schema: output_schema_path, config_overrides, } = cli; @@ -96,6 +98,8 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any } }; + let output_schema = load_output_schema(output_schema_path); + let (stdout_with_ansi, stderr_with_ansi) = match color { cli::Color::Always => (true, true), cli::Color::Never => (false, false), @@ -193,7 +197,14 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any // is using. event_processor.print_config_summary(&config, &prompt); - if !skip_git_repo_check && get_git_repo_root(&config.cwd.to_path_buf()).is_none() { + let default_cwd = config.cwd.to_path_buf(); + let default_approval_policy = config.approval_policy; + let default_sandbox_policy = config.sandbox_policy.clone(); + let default_model = config.model.clone(); + let default_effort = config.model_reasoning_effort; + let default_summary = config.model_reasoning_summary; + + if !skip_git_repo_check && get_git_repo_root(&default_cwd).is_none() { eprintln!("Not inside a trusted directory and --skip-git-repo-check was not specified."); std::process::exit(1); } @@ -288,7 +299,18 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any // Send the prompt. let items: Vec = vec![InputItem::Text { text: prompt }]; - let initial_prompt_task_id = conversation.submit(Op::UserInput { items }).await?; + let initial_prompt_task_id = conversation + .submit(Op::UserTurn { + items, + cwd: default_cwd, + approval_policy: default_approval_policy, + sandbox_policy: default_sandbox_policy, + model: default_model, + effort: default_effort, + summary: default_summary, + final_output_json_schema: output_schema, + }) + .await?; info!("Sent prompt with event ID: {initial_prompt_task_id}"); // Run the loop until the task is complete. @@ -327,3 +349,29 @@ async fn resolve_resume_path( Ok(None) } } + +fn load_output_schema(path: Option) -> Option { + let path = path?; + + let schema_str = match std::fs::read_to_string(&path) { + Ok(contents) => contents, + Err(err) => { + eprintln!( + "Failed to read output schema file {}: {err}", + path.display() + ); + std::process::exit(1); + } + }; + + match serde_json::from_str::(&schema_str) { + Ok(value) => Some(value), + Err(err) => { + eprintln!( + "Output schema file {} is not valid JSON: {err}", + path.display() + ); + std::process::exit(1); + } + } +} diff --git a/codex-rs/exec/tests/suite/mod.rs b/codex-rs/exec/tests/suite/mod.rs index 5748fba1..758f39ed 100644 --- a/codex-rs/exec/tests/suite/mod.rs +++ b/codex-rs/exec/tests/suite/mod.rs @@ -1,5 +1,6 @@ // Aggregates all former standalone integration tests as modules. mod apply_patch; mod common; +mod output_schema; mod resume; mod sandbox; diff --git a/codex-rs/exec/tests/suite/output_schema.rs b/codex-rs/exec/tests/suite/output_schema.rs new file mode 100644 index 00000000..954e1258 --- /dev/null +++ b/codex-rs/exec/tests/suite/output_schema.rs @@ -0,0 +1,76 @@ +#![cfg(not(target_os = "windows"))] +#![allow(clippy::expect_used, clippy::unwrap_used)] + +use assert_cmd::prelude::*; +use core_test_support::responses; +use serde_json::Value; +use std::process::Command; +use tempfile::TempDir; +use wiremock::matchers::any; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> { + let home = TempDir::new()?; + let workspace = TempDir::new()?; + + let schema_contents = serde_json::json!({ + "type": "object", + "properties": { + "answer": { "type": "string" } + }, + "required": ["answer"], + "additionalProperties": false + }); + let schema_path = workspace.path().join("schema.json"); + std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?; + let expected_schema: Value = schema_contents; + + let server = responses::start_mock_server().await; + let body = responses::sse(vec![ + serde_json::json!({ + "type": "response.created", + "response": {"id": "resp1"} + }), + responses::ev_assistant_message("m1", "fixture hello"), + responses::ev_completed("resp1"), + ]); + responses::mount_sse_once(&server, any(), body).await; + + Command::cargo_bin("codex-exec")? + .current_dir(workspace.path()) + .env("CODEX_HOME", home.path()) + .env("OPENAI_API_KEY", "dummy") + .env("OPENAI_BASE_URL", format!("{}/v1", server.uri())) + .arg("--skip-git-repo-check") + .arg("-C") + .arg(workspace.path()) + .arg("--output-schema") + .arg(&schema_path) + .arg("-m") + .arg("gpt-5") + .arg("tell me a joke") + .assert() + .success(); + + let requests = server + .received_requests() + .await + .expect("failed to capture requests"); + assert_eq!(requests.len(), 1, "expected exactly one request"); + let payload: Value = serde_json::from_slice(&requests[0].body)?; + let text = payload.get("text").expect("request missing text field"); + let format = text + .get("format") + .expect("request missing text.format field"); + assert_eq!( + format, + &serde_json::json!({ + "name": "codex_output_schema", + "type": "json_schema", + "strict": true, + "schema": expected_schema, + }) + ); + + Ok(()) +} diff --git a/codex-rs/mcp-server/src/codex_message_processor.rs b/codex-rs/mcp-server/src/codex_message_processor.rs index 5e5012c0..eec3a62a 100644 --- a/codex-rs/mcp-server/src/codex_message_processor.rs +++ b/codex-rs/mcp-server/src/codex_message_processor.rs @@ -1009,6 +1009,7 @@ impl CodexMessageProcessor { model, effort, summary, + final_output_json_schema: None, }) .await; diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index c8744b63..c45a89d7 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -24,6 +24,7 @@ use mcp_types::CallToolResult; use mcp_types::Tool as McpTool; use serde::Deserialize; use serde::Serialize; +use serde_json::Value; use serde_with::serde_as; use strum_macros::Display; use ts_rs::TS; @@ -87,6 +88,8 @@ pub enum Op { /// Will only be honored if the model is configured to use reasoning. summary: ReasoningSummaryConfig, + // The JSON schema to use for the final assistant message + final_output_json_schema: Option, }, /// Override parts of the persistent turn context for subsequent turns.