Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output` parameter.
2025-09-23 13:59:16 -07:00
parent 0f9a796617
commit fdb8dadcae
15 changed files with 341 additions and 7 deletions
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -35,6 +35,12 @@ pub(crate) async fn stream_chat_completions(
    client: &reqwest::Client,
    provider: &ModelProviderInfo,
 ) -> Result<ResponseStream> {
+    if prompt.output_schema.is_some() {
+        return Err(CodexErr::UnsupportedOperation(
+            "output_schema is not supported for Chat Completions API".to_string(),
+        ));
+    }
+
    // Build messages array
    let mut messages = Vec::<serde_json::Value>::new();

--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -185,7 +185,7 @@ impl ModelClient {

        // Only include `text.verbosity` for GPT-5 family models
        let text = if self.config.model_family.family == "gpt-5" {
-            create_text_param_for_request(self.config.model_verbosity)
+            create_text_param_for_request(self.config.model_verbosity, &prompt.output_schema)
        } else {
            if self.config.model_verbosity.is_some() {
                warn!(
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -10,6 +10,7 @@ use codex_protocol::config_types::Verbosity as VerbosityConfig;
 use codex_protocol::models::ResponseItem;
 use futures::Stream;
 use serde::Serialize;
+use serde_json::Value;
 use std::borrow::Cow;
 use std::ops::Deref;
 use std::pin::Pin;
@@ -32,6 +33,9 @@ pub struct Prompt {

    /// Optional override for the built-in BASE_INSTRUCTIONS.
    pub base_instructions_override: Option<String>,
+
+    /// Optional the output schema for the model's response.
+    pub output_schema: Option<Value>,
 }

 impl Prompt {
@@ -90,14 +94,31 @@ pub(crate) struct Reasoning {
    pub(crate) summary: Option<ReasoningSummaryConfig>,
 }

+#[derive(Debug, Serialize, Default, Clone)]
+#[serde(rename_all = "snake_case")]
+pub(crate) enum TextFormatType {
+    #[default]
+    JsonSchema,
+}
+
+#[derive(Debug, Serialize, Default, Clone)]
+pub(crate) struct TextFormat {
+    pub(crate) r#type: TextFormatType,
+    pub(crate) strict: bool,
+    pub(crate) schema: Value,
+    pub(crate) name: String,
+}
+
 /// Controls under the `text` field in the Responses API for GPT-5.
-#[derive(Debug, Serialize, Default, Clone, Copy)]
+#[derive(Debug, Serialize, Default, Clone)]
 pub(crate) struct TextControls {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) verbosity: Option<OpenAiVerbosity>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub(crate) format: Option<TextFormat>,
 }

-#[derive(Debug, Serialize, Default, Clone, Copy)]
+#[derive(Debug, Serialize, Default, Clone)]
 #[serde(rename_all = "lowercase")]
 pub(crate) enum OpenAiVerbosity {
    Low,
@@ -156,9 +177,20 @@ pub(crate) fn create_reasoning_param_for_request(

 pub(crate) fn create_text_param_for_request(
    verbosity: Option<VerbosityConfig>,
+    output_schema: &Option<Value>,
 ) -> Option<TextControls> {
-    verbosity.map(|v| TextControls {
-        verbosity: Some(v.into()),
+    if verbosity.is_none() && output_schema.is_none() {
+        return None;
+    }
+
+    Some(TextControls {
+        verbosity: verbosity.map(std::convert::Into::into),
+        format: output_schema.as_ref().map(|schema| TextFormat {
+            r#type: TextFormatType::JsonSchema,
+            strict: true,
+            schema: schema.clone(),
+            name: "codex_output_schema".to_string(),
+        }),
    })
 }

@@ -255,6 +287,7 @@ mod tests {
            prompt_cache_key: None,
            text: Some(TextControls {
                verbosity: Some(OpenAiVerbosity::Low),
+                format: None,
            }),
        };

@@ -267,6 +300,52 @@ mod tests {
        );
    }

+    #[test]
+    fn serializes_text_schema_with_strict_format() {
+        let input: Vec<ResponseItem> = vec![];
+        let tools: Vec<serde_json::Value> = vec![];
+        let schema = serde_json::json!({
+            "type": "object",
+            "properties": {
+                "answer": {"type": "string"}
+            },
+            "required": ["answer"],
+        });
+        let text_controls =
+            create_text_param_for_request(None, &Some(schema.clone())).expect("text controls");
+
+        let req = ResponsesApiRequest {
+            model: "gpt-5",
+            instructions: "i",
+            input: &input,
+            tools: &tools,
+            tool_choice: "auto",
+            parallel_tool_calls: false,
+            reasoning: None,
+            store: false,
+            stream: true,
+            include: vec![],
+            prompt_cache_key: None,
+            text: Some(text_controls),
+        };
+
+        let v = serde_json::to_value(&req).expect("json");
+        let text = v.get("text").expect("text field");
+        assert!(text.get("verbosity").is_none());
+        let format = text.get("format").expect("format field");
+
+        assert_eq!(
+            format.get("name"),
+            Some(&serde_json::Value::String("codex_output_schema".into()))
+        );
+        assert_eq!(
+            format.get("type"),
+            Some(&serde_json::Value::String("json_schema".into()))
+        );
+        assert_eq!(format.get("strict"), Some(&serde_json::Value::Bool(true)));
+        assert_eq!(format.get("schema"), Some(&schema));
+    }
+
    #[test]
    fn omits_text_when_not_set() {
        let input: Vec<ResponseItem> = vec![];
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -31,6 +31,7 @@ use mcp_types::CallToolResult;
 use serde::Deserialize;
 use serde::Serialize;
 use serde_json;
+use serde_json::Value;
 use tokio::sync::Mutex;
 use tokio::sync::oneshot;
 use tokio::task::AbortHandle;
@@ -302,6 +303,7 @@ pub(crate) struct TurnContext {
    pub(crate) shell_environment_policy: ShellEnvironmentPolicy,
    pub(crate) tools_config: ToolsConfig,
    pub(crate) is_review_mode: bool,
+    pub(crate) final_output_json_schema: Option<Value>,
 }

 impl TurnContext {
@@ -469,6 +471,7 @@ impl Session {
            shell_environment_policy: config.shell_environment_policy.clone(),
            cwd,
            is_review_mode: false,
+            final_output_json_schema: None,
        };
        let sess = Arc::new(Session {
            conversation_id,
@@ -1237,6 +1240,7 @@ async fn submission_loop(
                    shell_environment_policy: prev.shell_environment_policy.clone(),
                    cwd: new_cwd.clone(),
                    is_review_mode: false,
+                    final_output_json_schema: None,
                };

                // Install the new persistent context for subsequent tasks/turns.
@@ -1271,6 +1275,7 @@ async fn submission_loop(
                model,
                effort,
                summary,
+                final_output_json_schema,
            } => {
                // attempt to inject input into current task
                if let Err(items) = sess.inject_input(items).await {
@@ -1321,6 +1326,7 @@ async fn submission_loop(
                        shell_environment_policy: turn_context.shell_environment_policy.clone(),
                        cwd,
                        is_review_mode: false,
+                        final_output_json_schema,
                    };

                    // if the environment context has changed, record it in the conversation history
@@ -1575,6 +1581,7 @@ async fn spawn_review_thread(
        shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
        cwd: parent_turn_context.cwd.clone(),
        is_review_mode: true,
+        final_output_json_schema: None,
    };

    // Seed the child task with the review prompt as the initial user message.
@@ -1941,6 +1948,7 @@ async fn run_turn(
        input,
        tools,
        base_instructions_override: turn_context.base_instructions.clone(),
+        output_schema: turn_context.final_output_json_schema.clone(),
    };

    let mut retries = 0;
@@ -3604,6 +3612,7 @@ mod tests {
            shell_environment_policy: config.shell_environment_policy.clone(),
            tools_config,
            is_review_mode: false,
+            final_output_json_schema: None,
        };
        let session = Session {
            conversation_id,
--- a/codex-rs/core/src/codex/compact.rs
+++ b/codex-rs/core/src/codex/compact.rs
@@ -106,6 +106,7 @@ async fn run_compact_task_inner(
        input: turn_input,
        tools: Vec::new(),
        base_instructions_override: instructions_override,
+        output_schema: None,
    };

    let max_retries = turn_context.client.get_provider().stream_max_retries();
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -104,6 +104,9 @@ pub enum CodexErr {
    #[error("codex-linux-sandbox was required but not provided")]
    LandlockSandboxExecutableNotProvided,

+    #[error("unsupported operation: {0}")]
+    UnsupportedOperation(String),
+
    // -----------------------------------------------------------------
    // Automatic conversions for common external error types
    // -----------------------------------------------------------------