Introduce --oss flag to use gpt-oss models (#1848)

This adds support for easily running Codex backed by a local Ollama instance running our new open source models. See https://github.com/openai/gpt-oss for details. If you pass in `--oss` you'll be prompted to install/launch ollama, and it will automatically download the 20b model and attempt to use it. We'll likely want to expand this with some options later to make the experience smoother for users who can't run the 20b or want to run the 120b. Co-authored-by: Michael Bolin <mbolin@openai.com>
2025-08-05 11:31:11 -07:00
parent e0303dbac0
commit 9285350842
21 changed files with 924 additions and 44 deletions
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -385,6 +385,8 @@ pub struct ConfigOverrides {
    pub codex_linux_sandbox_exe: Option<PathBuf>,
    pub base_instructions: Option<String>,
    pub include_plan_tool: Option<bool>,
+    pub default_disable_response_storage: Option<bool>,
+    pub default_show_raw_agent_reasoning: Option<bool>,
 }

 impl Config {
@@ -408,6 +410,8 @@ impl Config {
            codex_linux_sandbox_exe,
            base_instructions,
            include_plan_tool,
+            default_disable_response_storage,
+            default_show_raw_agent_reasoning,
        } = overrides;

        let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
@@ -525,6 +529,7 @@ impl Config {
            disable_response_storage: config_profile
                .disable_response_storage
                .or(cfg.disable_response_storage)
+                .or(default_disable_response_storage)
                .unwrap_or(false),
            notify: cfg.notify,
            user_instructions,
@@ -539,7 +544,10 @@ impl Config {
            codex_linux_sandbox_exe,

            hide_agent_reasoning: cfg.hide_agent_reasoning.unwrap_or(false),
-            show_raw_agent_reasoning: cfg.show_raw_agent_reasoning.unwrap_or(false),
+            show_raw_agent_reasoning: cfg
+                .show_raw_agent_reasoning
+                .or(default_show_raw_agent_reasoning)
+                .unwrap_or(false),
            model_reasoning_effort: config_profile
                .model_reasoning_effort
                .or(cfg.model_reasoning_effort)
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -28,6 +28,7 @@ mod mcp_connection_manager;
 mod mcp_tool_call;
 mod message_history;
 mod model_provider_info;
+pub use model_provider_info::BUILT_IN_OSS_MODEL_PROVIDER_ID;
 pub use model_provider_info::ModelProviderInfo;
 pub use model_provider_info::WireApi;
 pub use model_provider_info::built_in_model_providers;
--- a/codex-rs/core/src/model_family.rs
+++ b/codex-rs/core/src/model_family.rs
@@ -85,6 +85,8 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
        )
    } else if slug.starts_with("gpt-4o") {
        simple_model_family!(slug, "gpt-4o")
+    } else if slug.starts_with("gpt-oss") {
+        simple_model_family!(slug, "gpt-oss")
    } else if slug.starts_with("gpt-3.5") {
        simple_model_family!(slug, "gpt-3.5")
    } else {
--- a/codex-rs/core/src/model_provider_info.rs
+++ b/codex-rs/core/src/model_provider_info.rs
@@ -226,53 +226,93 @@ impl ModelProviderInfo {
    }
 }

+const DEFAULT_OLLAMA_PORT: u32 = 11434;
+
+pub const BUILT_IN_OSS_MODEL_PROVIDER_ID: &str = "oss";
+
 /// Built-in default provider list.
 pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
    use ModelProviderInfo as P;

-    // We do not want to be in the business of adjucating which third-party
-    // providers are bundled with Codex CLI, so we only include the OpenAI
-    // provider by default. Users are encouraged to add to `model_providers`
-    // in config.toml to add their own providers.
-    [(
-        "openai",
-        P {
-            name: "OpenAI".into(),
-            // Allow users to override the default OpenAI endpoint by
-            // exporting `OPENAI_BASE_URL`. This is useful when pointing
-            // Codex at a proxy, mock server, or Azure-style deployment
-            // without requiring a full TOML override for the built-in
-            // OpenAI provider.
-            base_url: std::env::var("OPENAI_BASE_URL")
+    // These CODEX_OSS_ environment variables are experimental: we may
+    // switch to reading values from config.toml instead.
+    let codex_oss_base_url = match std::env::var("CODEX_OSS_BASE_URL")
+        .ok()
+        .filter(|v| !v.trim().is_empty())
+    {
+        Some(url) => url,
+        None => format!(
+            "http://localhost:{port}/v1",
+            port = std::env::var("CODEX_OSS_PORT")
                .ok()
-                .filter(|v| !v.trim().is_empty()),
-            env_key: None,
-            env_key_instructions: None,
-            wire_api: WireApi::Responses,
-            query_params: None,
-            http_headers: Some(
-                [("version".to_string(), env!("CARGO_PKG_VERSION").to_string())]
+                .filter(|v| !v.trim().is_empty())
+                .and_then(|v| v.parse::<u32>().ok())
+                .unwrap_or(DEFAULT_OLLAMA_PORT)
+        ),
+    };
+
+    // We do not want to be in the business of adjucating which third-party
+    // providers are bundled with Codex CLI, so we only include the OpenAI and
+    // open source ("oss") providers by default. Users are encouraged to add to
+    // `model_providers` in config.toml to add their own providers.
+    [
+        (
+            "openai",
+            P {
+                name: "OpenAI".into(),
+                // Allow users to override the default OpenAI endpoint by
+                // exporting `OPENAI_BASE_URL`. This is useful when pointing
+                // Codex at a proxy, mock server, or Azure-style deployment
+                // without requiring a full TOML override for the built-in
+                // OpenAI provider.
+                base_url: std::env::var("OPENAI_BASE_URL")
+                    .ok()
+                    .filter(|v| !v.trim().is_empty()),
+                env_key: None,
+                env_key_instructions: None,
+                wire_api: WireApi::Responses,
+                query_params: None,
+                http_headers: Some(
+                    [("version".to_string(), env!("CARGO_PKG_VERSION").to_string())]
+                        .into_iter()
+                        .collect(),
+                ),
+                env_http_headers: Some(
+                    [
+                        (
+                            "OpenAI-Organization".to_string(),
+                            "OPENAI_ORGANIZATION".to_string(),
+                        ),
+                        ("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()),
+                    ]
                    .into_iter()
                    .collect(),
-            ),
-            env_http_headers: Some(
-                [
-                    (
-                        "OpenAI-Organization".to_string(),
-                        "OPENAI_ORGANIZATION".to_string(),
-                    ),
-                    ("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()),
-                ]
-                .into_iter()
-                .collect(),
-            ),
-            // Use global defaults for retry/timeout unless overridden in config.toml.
-            request_max_retries: None,
-            stream_max_retries: None,
-            stream_idle_timeout_ms: None,
-            requires_auth: true,
-        },
-    )]
+                ),
+                // Use global defaults for retry/timeout unless overridden in config.toml.
+                request_max_retries: None,
+                stream_max_retries: None,
+                stream_idle_timeout_ms: None,
+                requires_auth: true,
+            },
+        ),
+        (
+            BUILT_IN_OSS_MODEL_PROVIDER_ID,
+            P {
+                name: "Open Source".into(),
+                base_url: Some(codex_oss_base_url),
+                env_key: None,
+                env_key_instructions: None,
+                wire_api: WireApi::Chat,
+                query_params: None,
+                http_headers: None,
+                env_http_headers: None,
+                request_max_retries: None,
+                stream_max_retries: None,
+                stream_idle_timeout_ms: None,
+                requires_auth: false,
+            },
+        ),
+    ]
    .into_iter()
    .map(|(k, v)| (k.to_string(), v))
    .collect()