llmx/codex-rs/core/src/config.rs

use crate::config_profile::ConfigProfile;
use crate::flags::OPENAI_DEFAULT_MODEL;
use crate::mcp_server_config::McpServerConfig;
use crate::model_provider_info::ModelProviderInfo;
use crate::model_provider_info::built_in_model_providers;
use crate::protocol::AskForApproval;
use crate::protocol::SandboxPermission;
use crate::protocol::SandboxPolicy;
use dirs::home_dir;
use serde::Deserialize;
use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;

/// Maximum number of bytes of the documentation that will be embedded. Larger
/// files are *silently truncated* to this size so we do not take up too much of
/// the context window.
pub(crate) const PROJECT_DOC_MAX_BYTES: usize = 32 * 1024; // 32 KiB

/// Application configuration loaded from disk and merged with overrides.
#[derive(Debug, Clone, PartialEq)]
pub struct Config {
    /// Optional override of model selection.
    pub model: String,

    /// Key into the model_providers map that specifies which provider to use.
    pub model_provider_id: String,

    /// Info needed to make an API request to the model.
    pub model_provider: ModelProviderInfo,

    /// Approval policy for executing commands.
    pub approval_policy: AskForApproval,

    pub sandbox_policy: SandboxPolicy,

    /// Disable server-side response storage (sends the full conversation
    /// context with every request). Currently necessary for OpenAI customers
    /// who have opted into Zero Data Retention (ZDR).
    pub disable_response_storage: bool,

    /// User-provided instructions from instructions.md.
    pub instructions: Option<String>,

    /// Optional external notifier command. When set, Codex will spawn this
    /// program after each completed *turn* (i.e. when the agent finishes
    /// processing a user submission). The value must be the full command
    /// broken into argv tokens **without** the trailing JSON argument - Codex
    /// appends one extra argument containing a JSON payload describing the
    /// event.
    ///
    /// Example `~/.codex/config.toml` snippet:
    ///
    /// ```toml
    /// notify = ["notify-send", "Codex"]
    /// ```
    ///
    /// which will be invoked as:
    ///
    /// ```shell
    /// notify-send Codex '{"type":"agent-turn-complete","turn-id":"12345"}'
    /// ```
    ///
    /// If unset the feature is disabled.
    pub notify: Option<Vec<String>>,

    /// The directory that should be treated as the current working directory
    /// for the session. All relative paths inside the business-logic layer are
    /// resolved against this path.
    pub cwd: PathBuf,

    /// Definition for MCP servers that Codex can reach out to for tool calls.
    pub mcp_servers: HashMap<String, McpServerConfig>,

    /// Combined provider map (defaults merged with user-defined overrides).
    pub model_providers: HashMap<String, ModelProviderInfo>,

    /// Maximum number of bytes to include from an AGENTS.md project doc file.
    pub project_doc_max_bytes: usize,
}

/// Base config deserialized from ~/.codex/config.toml.
#[derive(Deserialize, Debug, Clone, Default)]
pub struct ConfigToml {
    /// Optional override of model selection.
    pub model: Option<String>,

    /// Provider to use from the model_providers map.
    pub model_provider: Option<String>,

    /// Default approval policy for executing commands.
    pub approval_policy: Option<AskForApproval>,

    // The `default` attribute ensures that the field is treated as `None` when
    // the key is omitted from the TOML. Without it, Serde treats the field as
    // required because we supply a custom deserializer.
    #[serde(default, deserialize_with = "deserialize_sandbox_permissions")]
    pub sandbox_permissions: Option<Vec<SandboxPermission>>,

    /// Disable server-side response storage (sends the full conversation
    /// context with every request). Currently necessary for OpenAI customers
    /// who have opted into Zero Data Retention (ZDR).
    pub disable_response_storage: Option<bool>,

    /// Optional external command to spawn for end-user notifications.
    #[serde(default)]
    pub notify: Option<Vec<String>>,

    /// System instructions.
    pub instructions: Option<String>,

    /// Definition for MCP servers that Codex can reach out to for tool calls.
    #[serde(default)]
    pub mcp_servers: HashMap<String, McpServerConfig>,

    /// User-defined provider entries that extend/override the built-in list.
    #[serde(default)]
    pub model_providers: HashMap<String, ModelProviderInfo>,

    /// Maximum number of bytes to include from an AGENTS.md project doc file.
    pub project_doc_max_bytes: Option<usize>,

    /// Profile to use from the `profiles` map.
    pub profile: Option<String>,

    /// Named profiles to facilitate switching between different configurations.
    #[serde(default)]
    pub profiles: HashMap<String, ConfigProfile>,
}

impl ConfigToml {
    /// Attempt to parse the file at `~/.codex/config.toml`. If it does not
    /// exist, return a default config. Though if it exists and cannot be
    /// parsed, report that to the user and force them to fix it.
    fn load_from_toml() -> std::io::Result<Self> {
        let config_toml_path = codex_dir()?.join("config.toml");
        match std::fs::read_to_string(&config_toml_path) {
            Ok(contents) => toml::from_str::<Self>(&contents).map_err(|e| {
                tracing::error!("Failed to parse config.toml: {e}");
                std::io::Error::new(std::io::ErrorKind::InvalidData, e)
            }),
            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
                tracing::info!("config.toml not found, using defaults");
                Ok(Self::default())
            }
            Err(e) => {
                tracing::error!("Failed to read config.toml: {e}");
                Err(e)
            }
        }
    }
}

fn deserialize_sandbox_permissions<'de, D>(
    deserializer: D,
) -> Result<Option<Vec<SandboxPermission>>, D::Error>
where
    D: serde::Deserializer<'de>,
{
    let permissions: Option<Vec<String>> = Option::deserialize(deserializer)?;

    match permissions {
        Some(raw_permissions) => {
            let base_path = codex_dir().map_err(serde::de::Error::custom)?;

            let converted = raw_permissions
                .into_iter()
                .map(|raw| {
                    parse_sandbox_permission_with_base_path(&raw, base_path.clone())
                        .map_err(serde::de::Error::custom)
                })
                .collect::<Result<Vec<_>, D::Error>>()?;

            Ok(Some(converted))
        }
        None => Ok(None),
    }
}

/// Optional overrides for user configuration (e.g., from CLI flags).
#[derive(Default, Debug, Clone)]
pub struct ConfigOverrides {
    pub model: Option<String>,
    pub cwd: Option<PathBuf>,
    pub approval_policy: Option<AskForApproval>,
    pub sandbox_policy: Option<SandboxPolicy>,
    pub disable_response_storage: Option<bool>,
    pub model_provider: Option<String>,
    pub config_profile: Option<String>,
}

impl Config {
    /// Load configuration, optionally applying overrides (CLI flags). Merges
    /// ~/.codex/config.toml, ~/.codex/instructions.md, embedded defaults, and
    /// any values provided in `overrides` (highest precedence).
    pub fn load_with_overrides(overrides: ConfigOverrides) -> std::io::Result<Self> {
        let cfg: ConfigToml = ConfigToml::load_from_toml()?;
        tracing::warn!("Config parsed from config.toml: {cfg:?}");
        let codex_dir = codex_dir().ok();
        Self::load_from_base_config_with_overrides(cfg, overrides, codex_dir.as_deref())
    }

    fn load_from_base_config_with_overrides(
        cfg: ConfigToml,
        overrides: ConfigOverrides,
        codex_dir: Option<&Path>,
    ) -> std::io::Result<Self> {
        let instructions = Self::load_instructions(codex_dir);

        // Destructure ConfigOverrides fully to ensure all overrides are applied.
        let ConfigOverrides {
            model,
            cwd,
            approval_policy,
            sandbox_policy,
            disable_response_storage,
            model_provider,
            config_profile: config_profile_key,
        } = overrides;

        let config_profile = match config_profile_key.or(cfg.profile) {
            Some(key) => cfg
                .profiles
                .get(&key)
                .ok_or_else(|| {
                    std::io::Error::new(
                        std::io::ErrorKind::NotFound,
                        format!("config profile `{key}` not found"),
                    )
                })?
                .clone(),
            None => ConfigProfile::default(),
        };

        let sandbox_policy = match sandbox_policy {
            Some(sandbox_policy) => sandbox_policy,
            None => {
                // Derive a SandboxPolicy from the permissions in the config.
                match cfg.sandbox_permissions {
                    // Note this means the user can explicitly set permissions
                    // to the empty list in the config file, granting it no
                    // permissions whatsoever.
                    Some(permissions) => SandboxPolicy::from(permissions),
                    // Default to read only rather than completely locked down.
                    None => SandboxPolicy::new_read_only_policy(),
                }
            }
        };

        let mut model_providers = built_in_model_providers();
        // Merge user-defined providers into the built-in list.
        for (key, provider) in cfg.model_providers.into_iter() {
            model_providers.entry(key).or_insert(provider);
        }

        let model_provider_id = model_provider
            .or(config_profile.model_provider)
            .or(cfg.model_provider)
            .unwrap_or_else(|| "openai".to_string());
        let model_provider = model_providers
            .get(&model_provider_id)
            .ok_or_else(|| {
                std::io::Error::new(
                    std::io::ErrorKind::NotFound,
                    format!("Model provider `{model_provider_id}` not found"),
                )
            })?
            .clone();

        let resolved_cwd = {
            use std::env;

            match cwd {
                None => {
                    tracing::info!("cwd not set, using current dir");
                    env::current_dir()?
                }
                Some(p) if p.is_absolute() => p,
                Some(p) => {
                    // Resolve relative path against the current working directory.
                    tracing::info!("cwd is relative, resolving against current dir");
                    let mut current = env::current_dir()?;
                    current.push(p);
                    current
                }
            }
        };

        let config = Self {
            model: model
                .or(config_profile.model)
                .or(cfg.model)
                .unwrap_or_else(default_model),
            model_provider_id,
            model_provider,
            cwd: resolved_cwd,
            approval_policy: approval_policy
                .or(config_profile.approval_policy)
                .or(cfg.approval_policy)
                .unwrap_or_else(AskForApproval::default),
            sandbox_policy,
            disable_response_storage: disable_response_storage
                .or(config_profile.disable_response_storage)
                .or(cfg.disable_response_storage)
                .unwrap_or(false),
            notify: cfg.notify,
            instructions,
            mcp_servers: cfg.mcp_servers,
            model_providers,
            project_doc_max_bytes: cfg.project_doc_max_bytes.unwrap_or(PROJECT_DOC_MAX_BYTES),
        };
        Ok(config)
    }

    fn load_instructions(codex_dir: Option<&Path>) -> Option<String> {
        let mut p = match codex_dir {
            Some(p) => p.to_path_buf(),
            None => return None,
        };

        p.push("instructions.md");
        std::fs::read_to_string(&p).ok().and_then(|s| {
            let s = s.trim();
            if s.is_empty() {
                None
            } else {
                Some(s.to_string())
            }
        })
    }

    /// Meant to be used exclusively for tests: `load_with_overrides()` should
    /// be used in all other cases.
    pub fn load_default_config_for_test() -> Self {
        #[expect(clippy::expect_used)]
        Self::load_from_base_config_with_overrides(
            ConfigToml::default(),
            ConfigOverrides::default(),
            None,
        )
        .expect("defaults for test should always succeed")
    }
}

fn default_model() -> String {
    OPENAI_DEFAULT_MODEL.to_string()
}

/// Returns the path to the Codex configuration directory, which is `~/.codex`.
/// Does not verify that the directory exists.
pub fn codex_dir() -> std::io::Result<PathBuf> {
    let mut p = home_dir().ok_or_else(|| {
        std::io::Error::new(
            std::io::ErrorKind::NotFound,
            "Could not find home directory",
        )
    })?;
    p.push(".codex");
    Ok(p)
}

/// Returns the path to the folder where Codex logs are stored. Does not verify
/// that the directory exists.
pub fn log_dir() -> std::io::Result<PathBuf> {
    let mut p = codex_dir()?;
    p.push("log");
    Ok(p)
}

pub fn parse_sandbox_permission_with_base_path(
    raw: &str,
    base_path: PathBuf,
) -> std::io::Result<SandboxPermission> {
    use SandboxPermission::*;

    if let Some(path) = raw.strip_prefix("disk-write-folder=") {
        return if path.is_empty() {
            Err(std::io::Error::new(
                std::io::ErrorKind::InvalidInput,
                "--sandbox-permission disk-write-folder=<PATH> requires a non-empty PATH",
            ))
        } else {
            use path_absolutize::*;

            let file = PathBuf::from(path);
            let absolute_path = if file.is_relative() {
                file.absolutize_from(base_path)
            } else {
                file.absolutize()
            }
            .map(|path| path.into_owned())?;
            Ok(DiskWriteFolder {
                folder: absolute_path,
            })
        };
    }

    match raw {
        "disk-full-read-access" => Ok(DiskFullReadAccess),
        "disk-write-platform-user-temp-folder" => Ok(DiskWritePlatformUserTempFolder),
        "disk-write-platform-global-temp-folder" => Ok(DiskWritePlatformGlobalTempFolder),
        "disk-write-cwd" => Ok(DiskWriteCwd),
        "disk-full-write-access" => Ok(DiskFullWriteAccess),
        "network-full-access" => Ok(NetworkFullAccess),
        _ => Err(std::io::Error::new(
            std::io::ErrorKind::InvalidInput,
            format!(
                "`{raw}` is not a recognised permission.\nRun with `--help` to see the accepted values."
            ),
        )),
    }
}

#[cfg(test)]
mod tests {
    #![allow(clippy::expect_used, clippy::unwrap_used)]
    use super::*;
    use pretty_assertions::assert_eq;
    use tempfile::TempDir;

    /// Verify that the `sandbox_permissions` field on `ConfigToml` correctly
    /// differentiates between a value that is completely absent in the
    /// provided TOML (i.e. `None`) and one that is explicitly specified as an
    /// empty array (i.e. `Some(vec![])`). This ensures that downstream logic
    /// that treats these two cases differently (default read-only policy vs a
    /// fully locked-down sandbox) continues to function.
    #[test]
    fn test_sandbox_permissions_none_vs_empty_vec() {
        // Case 1: `sandbox_permissions` key is *absent* from the TOML source.
        let toml_source_without_key = "";
        let cfg_without_key: ConfigToml = toml::from_str(toml_source_without_key)
            .expect("TOML deserialization without key should succeed");
        assert!(cfg_without_key.sandbox_permissions.is_none());

        // Case 2: `sandbox_permissions` is present but set to an *empty array*.
        let toml_source_with_empty = "sandbox_permissions = []";
        let cfg_with_empty: ConfigToml = toml::from_str(toml_source_with_empty)
            .expect("TOML deserialization with empty array should succeed");
        assert_eq!(Some(vec![]), cfg_with_empty.sandbox_permissions);

        // Case 3: `sandbox_permissions` contains a non-empty list of valid values.
        let toml_source_with_values = r#"
            sandbox_permissions = ["disk-full-read-access", "network-full-access"]
        "#;
        let cfg_with_values: ConfigToml = toml::from_str(toml_source_with_values)
            .expect("TOML deserialization with valid permissions should succeed");

        assert_eq!(
            Some(vec![
                SandboxPermission::DiskFullReadAccess,
                SandboxPermission::NetworkFullAccess
            ]),
            cfg_with_values.sandbox_permissions
        );
    }

    /// Deserializing a TOML string containing an *invalid* permission should
    /// fail with a helpful error rather than silently defaulting or
    /// succeeding.
    #[test]
    fn test_sandbox_permissions_illegal_value() {
        let toml_bad = r#"sandbox_permissions = ["not-a-real-permission"]"#;

        let err = toml::from_str::<ConfigToml>(toml_bad)
            .expect_err("Deserialization should fail for invalid permission");

        // Make sure the error message contains the invalid value so users have
        // useful feedback.
        let msg = err.to_string();
        assert!(msg.contains("not-a-real-permission"));
    }

    /// Users can specify config values at multiple levels that have the
    /// following precedence:
    ///
    /// 1. custom command-line argument, e.g. `--model o3`
    /// 2. as part of a profile, where the `--profile` is specified via a CLI
    ///    (or in the config file itelf)
    /// 3. as an entry in `config.toml`, e.g. `model = "o3"`
    /// 4. the default value for a required field defined in code, e.g.,
    ///    `crate::flags::OPENAI_DEFAULT_MODEL`
    ///
    /// Note that profiles are the recommended way to specify a group of
    /// configuration options together.
    #[test]
    fn test_precedence_overrides_then_profile_then_config_toml() -> std::io::Result<()> {
        let toml = r#"
model = "o3"
approval_policy = "unless-allow-listed"
sandbox_permissions = ["disk-full-read-access"]
disable_response_storage = false

# Can be used to determine which profile to use if not specified by
# `ConfigOverrides`.
profile = "gpt3"

[model_providers.openai-chat-completions]
name = "OpenAI using Chat Completions"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
wire_api = "chat"

[profiles.o3]
model = "o3"
model_provider = "openai"
approval_policy = "never"

[profiles.gpt3]
model = "gpt-3.5-turbo"
model_provider = "openai-chat-completions"

[profiles.zdr]
model = "o3"
model_provider = "openai"
approval_policy = "on-failure"
disable_response_storage = true
"#;

        let cfg: ConfigToml = toml::from_str(toml).expect("TOML deserialization should succeed");

        // Use a temporary directory for the cwd so it does not contain an
        // AGENTS.md file.
        let cwd_temp_dir = TempDir::new().unwrap();
        let cwd = cwd_temp_dir.path().to_path_buf();
        // Make it look like a Git repo so it does not search for AGENTS.md in
        // a parent folder, either.
        std::fs::write(cwd.join(".git"), "gitdir: nowhere")?;

        let openai_chat_completions_provider = ModelProviderInfo {
            name: "OpenAI using Chat Completions".to_string(),
            base_url: "https://api.openai.com/v1".to_string(),
            env_key: Some("OPENAI_API_KEY".to_string()),
            wire_api: crate::WireApi::Chat,
            env_key_instructions: None,
        };
        let model_provider_map = {
            let mut model_provider_map = built_in_model_providers();
            model_provider_map.insert(
                "openai-chat-completions".to_string(),
                openai_chat_completions_provider.clone(),
            );
            model_provider_map
        };

        let openai_provider = model_provider_map
            .get("openai")
            .expect("openai provider should exist")
            .clone();

        let o3_profile_overrides = ConfigOverrides {
            config_profile: Some("o3".to_string()),
            cwd: Some(cwd.clone()),
            ..Default::default()
        };
        let o3_profile_config =
            Config::load_from_base_config_with_overrides(cfg.clone(), o3_profile_overrides, None)?;
        assert_eq!(
            Config {
                model: "o3".to_string(),
                model_provider_id: "openai".to_string(),
                model_provider: openai_provider.clone(),
                approval_policy: AskForApproval::Never,
                sandbox_policy: SandboxPolicy::new_read_only_policy(),
                disable_response_storage: false,
                instructions: None,
                notify: None,
                cwd: cwd.clone(),
                mcp_servers: HashMap::new(),
                model_providers: model_provider_map.clone(),
                project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
            },
            o3_profile_config
        );

        let gpt3_profile_overrides = ConfigOverrides {
            config_profile: Some("gpt3".to_string()),
            cwd: Some(cwd.clone()),
            ..Default::default()
        };
        let gpt3_profile_config = Config::load_from_base_config_with_overrides(
            cfg.clone(),
            gpt3_profile_overrides,
            None,
        )?;
        let expected_gpt3_profile_config = Config {
            model: "gpt-3.5-turbo".to_string(),
            model_provider_id: "openai-chat-completions".to_string(),
            model_provider: openai_chat_completions_provider,
            approval_policy: AskForApproval::UnlessAllowListed,
            sandbox_policy: SandboxPolicy::new_read_only_policy(),
            disable_response_storage: false,
            instructions: None,
            notify: None,
            cwd: cwd.clone(),
            mcp_servers: HashMap::new(),
            model_providers: model_provider_map.clone(),
            project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
        };
        assert_eq!(expected_gpt3_profile_config.clone(), gpt3_profile_config);

        // Verify that loading without specifying a profile in ConfigOverrides
        // uses the default profile from the config file.
        let default_profile_overrides = ConfigOverrides {
            cwd: Some(cwd.clone()),
            ..Default::default()
        };
        let default_profile_config = Config::load_from_base_config_with_overrides(
            cfg.clone(),
            default_profile_overrides,
            None,
        )?;
        assert_eq!(expected_gpt3_profile_config, default_profile_config);

        let zdr_profile_overrides = ConfigOverrides {
            config_profile: Some("zdr".to_string()),
            cwd: Some(cwd.clone()),
            ..Default::default()
        };
        let zdr_profile_config =
            Config::load_from_base_config_with_overrides(cfg.clone(), zdr_profile_overrides, None)?;
        assert_eq!(
            Config {
                model: "o3".to_string(),
                model_provider_id: "openai".to_string(),
                model_provider: openai_provider.clone(),
                approval_policy: AskForApproval::OnFailure,
                sandbox_policy: SandboxPolicy::new_read_only_policy(),
                disable_response_storage: true,
                instructions: None,
                notify: None,
                cwd: cwd.clone(),
                mcp_servers: HashMap::new(),
                model_providers: model_provider_map.clone(),
                project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
            },
            zdr_profile_config
        );

        Ok(())
    }
}