Introduce --oss flag to use gpt-oss models (#1848)

This adds support for easily running Codex backed by a local Ollama
instance running our new open source models. See
https://github.com/openai/gpt-oss for details.

If you pass in `--oss` you'll be prompted to install/launch ollama, and
it will automatically download the 20b model and attempt to use it.

We'll likely want to expand this with some options later to make the
experience smoother for users who can't run the 20b or want to run the
120b.

Co-authored-by: Michael Bolin <mbolin@openai.com>
This commit is contained in:
easong-openai
2025-08-05 11:31:11 -07:00
committed by GitHub
parent e0303dbac0
commit 9285350842
21 changed files with 924 additions and 44 deletions

View File

@@ -385,6 +385,8 @@ pub struct ConfigOverrides {
pub codex_linux_sandbox_exe: Option<PathBuf>,
pub base_instructions: Option<String>,
pub include_plan_tool: Option<bool>,
pub default_disable_response_storage: Option<bool>,
pub default_show_raw_agent_reasoning: Option<bool>,
}
impl Config {
@@ -408,6 +410,8 @@ impl Config {
codex_linux_sandbox_exe,
base_instructions,
include_plan_tool,
default_disable_response_storage,
default_show_raw_agent_reasoning,
} = overrides;
let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
@@ -525,6 +529,7 @@ impl Config {
disable_response_storage: config_profile
.disable_response_storage
.or(cfg.disable_response_storage)
.or(default_disable_response_storage)
.unwrap_or(false),
notify: cfg.notify,
user_instructions,
@@ -539,7 +544,10 @@ impl Config {
codex_linux_sandbox_exe,
hide_agent_reasoning: cfg.hide_agent_reasoning.unwrap_or(false),
show_raw_agent_reasoning: cfg.show_raw_agent_reasoning.unwrap_or(false),
show_raw_agent_reasoning: cfg
.show_raw_agent_reasoning
.or(default_show_raw_agent_reasoning)
.unwrap_or(false),
model_reasoning_effort: config_profile
.model_reasoning_effort
.or(cfg.model_reasoning_effort)

View File

@@ -28,6 +28,7 @@ mod mcp_connection_manager;
mod mcp_tool_call;
mod message_history;
mod model_provider_info;
pub use model_provider_info::BUILT_IN_OSS_MODEL_PROVIDER_ID;
pub use model_provider_info::ModelProviderInfo;
pub use model_provider_info::WireApi;
pub use model_provider_info::built_in_model_providers;

View File

@@ -85,6 +85,8 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
)
} else if slug.starts_with("gpt-4o") {
simple_model_family!(slug, "gpt-4o")
} else if slug.starts_with("gpt-oss") {
simple_model_family!(slug, "gpt-oss")
} else if slug.starts_with("gpt-3.5") {
simple_model_family!(slug, "gpt-3.5")
} else {

View File

@@ -226,53 +226,93 @@ impl ModelProviderInfo {
}
}
const DEFAULT_OLLAMA_PORT: u32 = 11434;
pub const BUILT_IN_OSS_MODEL_PROVIDER_ID: &str = "oss";
/// Built-in default provider list.
pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
use ModelProviderInfo as P;
// We do not want to be in the business of adjucating which third-party
// providers are bundled with Codex CLI, so we only include the OpenAI
// provider by default. Users are encouraged to add to `model_providers`
// in config.toml to add their own providers.
[(
"openai",
P {
name: "OpenAI".into(),
// Allow users to override the default OpenAI endpoint by
// exporting `OPENAI_BASE_URL`. This is useful when pointing
// Codex at a proxy, mock server, or Azure-style deployment
// without requiring a full TOML override for the built-in
// OpenAI provider.
base_url: std::env::var("OPENAI_BASE_URL")
// These CODEX_OSS_ environment variables are experimental: we may
// switch to reading values from config.toml instead.
let codex_oss_base_url = match std::env::var("CODEX_OSS_BASE_URL")
.ok()
.filter(|v| !v.trim().is_empty())
{
Some(url) => url,
None => format!(
"http://localhost:{port}/v1",
port = std::env::var("CODEX_OSS_PORT")
.ok()
.filter(|v| !v.trim().is_empty()),
env_key: None,
env_key_instructions: None,
wire_api: WireApi::Responses,
query_params: None,
http_headers: Some(
[("version".to_string(), env!("CARGO_PKG_VERSION").to_string())]
.filter(|v| !v.trim().is_empty())
.and_then(|v| v.parse::<u32>().ok())
.unwrap_or(DEFAULT_OLLAMA_PORT)
),
};
// We do not want to be in the business of adjucating which third-party
// providers are bundled with Codex CLI, so we only include the OpenAI and
// open source ("oss") providers by default. Users are encouraged to add to
// `model_providers` in config.toml to add their own providers.
[
(
"openai",
P {
name: "OpenAI".into(),
// Allow users to override the default OpenAI endpoint by
// exporting `OPENAI_BASE_URL`. This is useful when pointing
// Codex at a proxy, mock server, or Azure-style deployment
// without requiring a full TOML override for the built-in
// OpenAI provider.
base_url: std::env::var("OPENAI_BASE_URL")
.ok()
.filter(|v| !v.trim().is_empty()),
env_key: None,
env_key_instructions: None,
wire_api: WireApi::Responses,
query_params: None,
http_headers: Some(
[("version".to_string(), env!("CARGO_PKG_VERSION").to_string())]
.into_iter()
.collect(),
),
env_http_headers: Some(
[
(
"OpenAI-Organization".to_string(),
"OPENAI_ORGANIZATION".to_string(),
),
("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()),
]
.into_iter()
.collect(),
),
env_http_headers: Some(
[
(
"OpenAI-Organization".to_string(),
"OPENAI_ORGANIZATION".to_string(),
),
("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()),
]
.into_iter()
.collect(),
),
// Use global defaults for retry/timeout unless overridden in config.toml.
request_max_retries: None,
stream_max_retries: None,
stream_idle_timeout_ms: None,
requires_auth: true,
},
)]
),
// Use global defaults for retry/timeout unless overridden in config.toml.
request_max_retries: None,
stream_max_retries: None,
stream_idle_timeout_ms: None,
requires_auth: true,
},
),
(
BUILT_IN_OSS_MODEL_PROVIDER_ID,
P {
name: "Open Source".into(),
base_url: Some(codex_oss_base_url),
env_key: None,
env_key_instructions: None,
wire_api: WireApi::Chat,
query_params: None,
http_headers: None,
env_http_headers: None,
request_max_retries: None,
stream_max_retries: None,
stream_idle_timeout_ms: None,
requires_auth: false,
},
),
]
.into_iter()
.map(|(k, v)| (k.to_string(), v))
.collect()