2025-10-27 19:41:49 -07:00
|
|
|
use crate::auth::AuthCredentialsStoreMode;
|
2025-10-30 10:28:32 +00:00
|
|
|
use crate::config::types::DEFAULT_OTEL_ENVIRONMENT;
|
|
|
|
|
use crate::config::types::History;
|
|
|
|
|
use crate::config::types::McpServerConfig;
|
|
|
|
|
use crate::config::types::Notice;
|
|
|
|
|
use crate::config::types::Notifications;
|
|
|
|
|
use crate::config::types::OtelConfig;
|
|
|
|
|
use crate::config::types::OtelConfigToml;
|
|
|
|
|
use crate::config::types::OtelExporterKind;
|
|
|
|
|
use crate::config::types::ReasoningSummaryFormat;
|
|
|
|
|
use crate::config::types::SandboxWorkspaceWrite;
|
|
|
|
|
use crate::config::types::ShellEnvironmentPolicy;
|
|
|
|
|
use crate::config::types::ShellEnvironmentPolicyToml;
|
|
|
|
|
use crate::config::types::Tui;
|
|
|
|
|
use crate::config::types::UriBasedFileOpener;
|
2025-10-03 13:02:26 -07:00
|
|
|
use crate::config_loader::LoadedConfigLayers;
|
2025-10-30 10:28:32 +00:00
|
|
|
use crate::config_loader::load_config_as_toml;
|
2025-10-03 13:02:26 -07:00
|
|
|
use crate::config_loader::load_config_layers_with_overrides;
|
|
|
|
|
use crate::config_loader::merge_toml_values;
|
2025-10-14 18:50:00 +01:00
|
|
|
use crate::features::Feature;
|
|
|
|
|
use crate::features::FeatureOverrides;
|
|
|
|
|
use crate::features::Features;
|
|
|
|
|
use crate::features::FeaturesToml;
|
2025-08-22 13:54:51 -07:00
|
|
|
use crate::git_info::resolve_root_git_project_for_trust;
|
2025-08-04 23:50:03 -07:00
|
|
|
use crate::model_family::ModelFamily;
|
2025-09-14 15:45:15 -07:00
|
|
|
use crate::model_family::derive_default_model_family;
|
2025-08-04 23:50:03 -07:00
|
|
|
use crate::model_family::find_family_for_model;
|
2025-05-07 17:38:28 -07:00
|
|
|
use crate::model_provider_info::ModelProviderInfo;
|
|
|
|
|
use crate::model_provider_info::built_in_model_providers;
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
use crate::openai_model_info::get_model_info;
|
2025-10-15 17:46:01 +01:00
|
|
|
use crate::project_doc::DEFAULT_PROJECT_DOC_FILENAME;
|
|
|
|
|
use crate::project_doc::LOCAL_PROJECT_DOC_FILENAME;
|
2025-04-27 21:47:50 -07:00
|
|
|
use crate::protocol::AskForApproval;
|
|
|
|
|
use crate::protocol::SandboxPolicy;
|
fix: remove mcp-types from app server protocol (#4537)
We continue the separation between `codex app-server` and `codex
mcp-server`.
In particular, we introduce a new crate, `codex-app-server-protocol`,
and migrate `codex-rs/protocol/src/mcp_protocol.rs` into it, renaming it
`codex-rs/app-server-protocol/src/protocol.rs`.
Because `ConversationId` was defined in `mcp_protocol.rs`, we move it
into its own file, `codex-rs/protocol/src/conversation_id.rs`, and
because it is referenced in a ton of places, we have to touch a lot of
files as part of this PR.
We also decide to get away from proper JSON-RPC 2.0 semantics, so we
also introduce `codex-rs/app-server-protocol/src/jsonrpc_lite.rs`, which
is basically the same `JSONRPCMessage` type defined in `mcp-types`
except with all of the `"jsonrpc": "2.0"` removed.
Getting rid of `"jsonrpc": "2.0"` makes our serialization logic
considerably simpler, as we can lean heavier on serde to serialize
directly into the wire format that we use now.
2025-09-30 19:16:26 -07:00
|
|
|
use codex_app_server_protocol::Tools;
|
|
|
|
|
use codex_app_server_protocol::UserSavedConfig;
|
2025-10-20 08:50:54 -07:00
|
|
|
use codex_protocol::config_types::ForcedLoginMethod;
|
2025-08-18 11:50:17 -07:00
|
|
|
use codex_protocol::config_types::ReasoningEffort;
|
|
|
|
|
use codex_protocol::config_types::ReasoningSummary;
|
2025-08-18 09:36:57 -07:00
|
|
|
use codex_protocol::config_types::SandboxMode;
|
2025-09-03 12:20:31 -07:00
|
|
|
use codex_protocol::config_types::Verbosity;
|
2025-10-07 19:39:32 -07:00
|
|
|
use codex_rmcp_client::OAuthCredentialsStoreMode;
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
use dirs::home_dir;
|
2025-10-18 22:13:53 -07:00
|
|
|
use dunce::canonicalize;
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
use serde::Deserialize;
|
2025-10-16 11:23:38 -07:00
|
|
|
use similar::DiffableStr;
|
2025-09-14 21:30:56 -07:00
|
|
|
use std::collections::BTreeMap;
|
feat: support mcp_servers in config.toml (#829)
This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.
(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)
`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.
This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)
To test, I added the following to my `~/.codex/config.toml`:
```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```
And then I ran the following:
```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):
This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph
Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```
Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
2025-05-06 15:47:59 -07:00
|
|
|
use std::collections::HashMap;
|
2025-10-07 20:21:37 -07:00
|
|
|
use std::io::ErrorKind;
|
2025-05-13 16:52:52 -07:00
|
|
|
use std::path::Path;
|
2025-04-27 21:47:50 -07:00
|
|
|
use std::path::PathBuf;
|
2025-10-07 20:21:37 -07:00
|
|
|
|
2025-10-30 10:28:32 +00:00
|
|
|
use crate::config::profile::ConfigProfile;
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
use toml::Value as TomlValue;
|
2025-08-07 09:27:38 -07:00
|
|
|
use toml_edit::DocumentMut;
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
|
2025-10-30 10:28:32 +00:00
|
|
|
pub mod edit;
|
|
|
|
|
pub mod profile;
|
|
|
|
|
pub mod types;
|
|
|
|
|
|
2025-10-03 14:00:03 -07:00
|
|
|
#[cfg(target_os = "windows")]
|
|
|
|
|
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
|
|
|
|
|
#[cfg(not(target_os = "windows"))]
|
|
|
|
|
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5-codex";
|
2025-09-16 13:36:51 -07:00
|
|
|
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5-codex";
|
2025-09-15 08:17:13 -07:00
|
|
|
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5-codex";
|
2025-08-07 10:13:13 -07:00
|
|
|
|
2025-05-10 17:52:59 -07:00
|
|
|
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
|
|
|
|
/// files are *silently truncated* to this size so we do not take up too much of
|
|
|
|
|
/// the context window.
|
|
|
|
|
pub(crate) const PROJECT_DOC_MAX_BYTES: usize = 32 * 1024; // 32 KiB
|
|
|
|
|
|
2025-09-10 13:53:46 -07:00
|
|
|
pub(crate) const CONFIG_TOML_FILE: &str = "config.toml";
|
2025-08-07 09:27:38 -07:00
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
/// Application configuration loaded from disk and merged with overrides.
|
2025-05-13 16:52:52 -07:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
pub struct Config {
|
2025-04-27 21:47:50 -07:00
|
|
|
/// Optional override of model selection.
|
|
|
|
|
pub model: String,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
|
2025-09-22 20:10:52 -07:00
|
|
|
/// Model used specifically for review sessions. Defaults to "gpt-5-codex".
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
pub review_model: String,
|
|
|
|
|
|
2025-08-04 23:50:03 -07:00
|
|
|
pub model_family: ModelFamily,
|
|
|
|
|
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
/// Size of the context window for the model, in tokens.
|
2025-10-20 11:29:49 -07:00
|
|
|
pub model_context_window: Option<i64>,
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
|
|
|
|
|
/// Maximum number of output tokens.
|
2025-10-20 11:29:49 -07:00
|
|
|
pub model_max_output_tokens: Option<i64>,
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
|
2025-09-12 13:07:10 -07:00
|
|
|
/// Token usage threshold triggering auto-compaction of conversation history.
|
|
|
|
|
pub model_auto_compact_token_limit: Option<i64>,
|
|
|
|
|
|
2025-05-08 21:46:06 -07:00
|
|
|
/// Key into the model_providers map that specifies which provider to use.
|
|
|
|
|
pub model_provider_id: String,
|
|
|
|
|
|
2025-05-07 17:38:28 -07:00
|
|
|
/// Info needed to make an API request to the model.
|
|
|
|
|
pub model_provider: ModelProviderInfo,
|
|
|
|
|
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
/// Approval policy for executing commands.
|
2025-04-27 21:47:50 -07:00
|
|
|
pub approval_policy: AskForApproval,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
pub sandbox_policy: SandboxPolicy,
|
2025-04-28 15:39:34 -07:00
|
|
|
|
2025-10-16 11:23:38 -07:00
|
|
|
/// True if the user passed in an override or set a value in config.toml
|
|
|
|
|
/// for either of approval_policy or sandbox_mode.
|
|
|
|
|
pub did_user_set_custom_approval_policy_or_sandbox_mode: bool,
|
|
|
|
|
|
2025-10-27 18:19:32 -07:00
|
|
|
/// On Windows, indicates that a previously configured workspace-write sandbox
|
|
|
|
|
/// was coerced to read-only because native auto mode is unsupported.
|
|
|
|
|
pub forced_auto_mode_downgraded_on_windows: bool,
|
|
|
|
|
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
pub shell_environment_policy: ShellEnvironmentPolicy,
|
|
|
|
|
|
2025-05-30 23:14:56 -07:00
|
|
|
/// When `true`, `AgentReasoning` events emitted by the backend will be
|
|
|
|
|
/// suppressed from the frontend output. This can reduce visual noise when
|
|
|
|
|
/// users are only interested in the final agent responses.
|
|
|
|
|
pub hide_agent_reasoning: bool,
|
|
|
|
|
|
2025-08-05 01:56:13 -07:00
|
|
|
/// When set to `true`, `AgentReasoningRawContentEvent` events will be shown in the UI/output.
|
|
|
|
|
/// Defaults to `false`.
|
|
|
|
|
pub show_raw_agent_reasoning: bool,
|
|
|
|
|
|
2025-08-06 11:48:03 -07:00
|
|
|
/// User-provided instructions from AGENTS.md.
|
2025-07-22 09:42:22 -07:00
|
|
|
pub user_instructions: Option<String>,
|
|
|
|
|
|
|
|
|
|
/// Base instructions override.
|
|
|
|
|
pub base_instructions: Option<String>,
|
feat: configurable notifications in the Rust CLI (#793)
With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:
```rust
pub(crate) enum UserNotification {
#[serde(rename_all = "kebab-case")]
AgentTurnComplete {
turn_id: String,
/// Messages that the user sent to the agent to initiate the turn.
input_messages: Vec<String>,
/// The last message sent by the assistant in the turn.
last_assistant_message: Option<String>,
},
}
```
This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.
For example, I have the following in my `~/.codex/config.toml`:
```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```
I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:
```python
#!/usr/bin/env python3
import json
import subprocess
import sys
def main() -> int:
if len(sys.argv) != 2:
print("Usage: notify.py <NOTIFICATION_JSON>")
return 1
try:
notification = json.loads(sys.argv[1])
except json.JSONDecodeError:
return 1
match notification_type := notification.get("type"):
case "agent-turn-complete":
assistant_message = notification.get("last-assistant-message")
if assistant_message:
title = f"Codex: {assistant_message}"
else:
title = "Codex: Turn Complete!"
input_messages = notification.get("input_messages", [])
message = " ".join(input_messages)
title += message
case _:
print(f"not sending a push notification for: {notification_type}")
return 0
subprocess.check_output(
[
"terminal-notifier",
"-title",
title,
"-message",
message,
"-group",
"codex",
"-ignoreDnD",
"-activate",
"com.googlecode.iterm2",
]
)
return 0
if __name__ == "__main__":
sys.exit(main())
```
For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:
* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
2025-05-02 19:48:13 -07:00
|
|
|
|
|
|
|
|
/// Optional external notifier command. When set, Codex will spawn this
|
|
|
|
|
/// program after each completed *turn* (i.e. when the agent finishes
|
|
|
|
|
/// processing a user submission). The value must be the full command
|
|
|
|
|
/// broken into argv tokens **without** the trailing JSON argument - Codex
|
|
|
|
|
/// appends one extra argument containing a JSON payload describing the
|
|
|
|
|
/// event.
|
|
|
|
|
///
|
|
|
|
|
/// Example `~/.codex/config.toml` snippet:
|
|
|
|
|
///
|
|
|
|
|
/// ```toml
|
|
|
|
|
/// notify = ["notify-send", "Codex"]
|
|
|
|
|
/// ```
|
|
|
|
|
///
|
|
|
|
|
/// which will be invoked as:
|
|
|
|
|
///
|
|
|
|
|
/// ```shell
|
|
|
|
|
/// notify-send Codex '{"type":"agent-turn-complete","turn-id":"12345"}'
|
|
|
|
|
/// ```
|
|
|
|
|
///
|
|
|
|
|
/// If unset the feature is disabled.
|
|
|
|
|
pub notify: Option<Vec<String>>,
|
2025-05-04 10:57:12 -07:00
|
|
|
|
2025-09-15 10:22:02 -07:00
|
|
|
/// TUI notifications preference. When set, the TUI will send OSC 9 notifications on approvals
|
|
|
|
|
/// and turn completions when not focused.
|
|
|
|
|
pub tui_notifications: Notifications,
|
|
|
|
|
|
2025-05-04 10:57:12 -07:00
|
|
|
/// The directory that should be treated as the current working directory
|
|
|
|
|
/// for the session. All relative paths inside the business-logic layer are
|
|
|
|
|
/// resolved against this path.
|
|
|
|
|
pub cwd: PathBuf,
|
feat: support mcp_servers in config.toml (#829)
This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.
(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)
`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.
This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)
To test, I added the following to my `~/.codex/config.toml`:
```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```
And then I ran the following:
```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):
This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph
Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```
Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
2025-05-06 15:47:59 -07:00
|
|
|
|
2025-10-27 19:41:49 -07:00
|
|
|
/// Preferred store for CLI auth credentials.
|
|
|
|
|
/// file (default): Use a file in the Codex home directory.
|
|
|
|
|
/// keyring: Use an OS-specific keyring service.
|
|
|
|
|
/// auto: Use the OS-specific keyring service if available, otherwise use a file.
|
|
|
|
|
pub cli_auth_credentials_store_mode: AuthCredentialsStoreMode,
|
|
|
|
|
|
feat: support mcp_servers in config.toml (#829)
This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.
(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)
`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.
This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)
To test, I added the following to my `~/.codex/config.toml`:
```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```
And then I ran the following:
```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):
This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph
Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```
Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
2025-05-06 15:47:59 -07:00
|
|
|
/// Definition for MCP servers that Codex can reach out to for tool calls.
|
|
|
|
|
pub mcp_servers: HashMap<String, McpServerConfig>,
|
2025-05-07 17:38:28 -07:00
|
|
|
|
2025-10-07 19:39:32 -07:00
|
|
|
/// Preferred store for MCP OAuth credentials.
|
|
|
|
|
/// keyring: Use an OS-specific keyring service.
|
|
|
|
|
/// Credentials stored in the keyring will only be readable by Codex unless the user explicitly grants access via OS-level keyring access.
|
|
|
|
|
/// https://github.com/openai/codex/blob/main/codex-rs/rmcp-client/src/oauth.rs#L2
|
|
|
|
|
/// file: CODEX_HOME/.credentials.json
|
|
|
|
|
/// This file will be readable to Codex and other applications running as the same user.
|
|
|
|
|
/// auto (default): keyring if available, otherwise file.
|
|
|
|
|
pub mcp_oauth_credentials_store_mode: OAuthCredentialsStoreMode,
|
|
|
|
|
|
2025-05-07 17:38:28 -07:00
|
|
|
/// Combined provider map (defaults merged with user-defined overrides).
|
|
|
|
|
pub model_providers: HashMap<String, ModelProviderInfo>,
|
2025-05-10 17:52:59 -07:00
|
|
|
|
|
|
|
|
/// Maximum number of bytes to include from an AGENTS.md project doc file.
|
|
|
|
|
pub project_doc_max_bytes: usize,
|
2025-05-15 00:30:13 -07:00
|
|
|
|
2025-10-01 11:19:59 -07:00
|
|
|
/// Additional filenames to try when looking for project-level docs.
|
|
|
|
|
pub project_doc_fallback_filenames: Vec<String>,
|
|
|
|
|
|
2025-05-15 00:30:13 -07:00
|
|
|
/// Directory containing all Codex state (defaults to `~/.codex` but can be
|
|
|
|
|
/// overridden by the `CODEX_HOME` environment variable).
|
|
|
|
|
pub codex_home: PathBuf,
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
|
|
|
|
|
/// Settings that govern if and what will be written to `~/.codex/history.jsonl`.
|
|
|
|
|
pub history: History,
|
2025-05-16 11:33:08 -07:00
|
|
|
|
|
|
|
|
/// Optional URI-based file opener. If set, citations to files in the model
|
|
|
|
|
/// output will be hyperlinked using the specified URI scheme.
|
|
|
|
|
pub file_opener: UriBasedFileOpener,
|
2025-05-16 16:16:50 -07:00
|
|
|
|
2025-05-22 21:52:28 -07:00
|
|
|
/// Path to the `codex-linux-sandbox` executable. This must be set if
|
|
|
|
|
/// [`crate::exec::SandboxType::LinuxSeccomp`] is used. Note that this
|
|
|
|
|
/// cannot be set in the config file: it must be set in code via
|
|
|
|
|
/// [`ConfigOverrides`].
|
|
|
|
|
///
|
|
|
|
|
/// When this program is invoked, arg0 will be set to `codex-linux-sandbox`.
|
|
|
|
|
pub codex_linux_sandbox_exe: Option<PathBuf>,
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
|
2025-08-19 10:55:07 -07:00
|
|
|
/// Value to use for `reasoning.effort` when making a request using the
|
|
|
|
|
/// Responses API.
|
2025-09-12 12:06:33 -07:00
|
|
|
pub model_reasoning_effort: Option<ReasoningEffort>,
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
|
|
|
|
|
/// If not "none", the value to use for `reasoning.summary` when making a
|
|
|
|
|
/// request using the Responses API.
|
|
|
|
|
pub model_reasoning_summary: ReasoningSummary,
|
2025-07-10 14:30:33 -07:00
|
|
|
|
2025-08-22 17:12:10 +01:00
|
|
|
/// Optional verbosity control for GPT-5 models (Responses API `text.verbosity`).
|
|
|
|
|
pub model_verbosity: Option<Verbosity>,
|
|
|
|
|
|
2025-07-11 13:30:11 -04:00
|
|
|
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
|
|
|
|
pub chatgpt_base_url: String,
|
2025-07-18 17:04:04 -07:00
|
|
|
|
2025-10-20 08:50:54 -07:00
|
|
|
/// When set, restricts ChatGPT login to a specific workspace identifier.
|
|
|
|
|
pub forced_chatgpt_workspace_id: Option<String>,
|
|
|
|
|
|
|
|
|
|
/// When set, restricts the login mechanism users may use.
|
|
|
|
|
pub forced_login_method: Option<ForcedLoginMethod>,
|
|
|
|
|
|
2025-08-15 11:55:53 -04:00
|
|
|
/// Include the `apply_patch` tool for models that benefit from invoking
|
|
|
|
|
/// file edits as a structured tool call. When unset, this falls back to the
|
|
|
|
|
/// model family's default preference.
|
|
|
|
|
pub include_apply_patch_tool: bool,
|
|
|
|
|
|
2025-08-23 22:58:56 -07:00
|
|
|
pub tools_web_search_request: bool,
|
|
|
|
|
|
2025-10-24 17:23:44 -05:00
|
|
|
/// When `true`, run a model-based assessment for commands denied by the sandbox.
|
|
|
|
|
pub experimental_sandbox_command_assessment: bool,
|
|
|
|
|
|
2025-08-22 18:10:55 -07:00
|
|
|
pub use_experimental_streamable_shell_tool: bool,
|
2025-08-27 17:41:23 -07:00
|
|
|
|
Unified execution (#3288)
## Unified PTY-Based Exec Tool
Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`
- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
isolation, timeouts, and truncation behavior.
2025-09-10 17:38:11 -07:00
|
|
|
/// If set to `true`, used only the experimental unified exec tool.
|
|
|
|
|
pub use_experimental_unified_exec_tool: bool,
|
|
|
|
|
|
2025-09-26 10:13:37 -07:00
|
|
|
/// If set to `true`, use the experimental official Rust MCP client.
|
|
|
|
|
/// https://github.com/modelcontextprotocol/rust-sdk
|
|
|
|
|
pub use_experimental_use_rmcp_client: bool,
|
|
|
|
|
|
2025-08-27 17:41:23 -07:00
|
|
|
/// Include the `view_image` tool that lets the agent attach a local image path to context.
|
|
|
|
|
pub include_view_image_tool: bool,
|
2025-09-10 13:53:46 -07:00
|
|
|
|
2025-10-14 18:50:00 +01:00
|
|
|
/// Centralized feature flags; source of truth for feature gating.
|
|
|
|
|
pub features: Features,
|
|
|
|
|
|
2025-09-10 13:53:46 -07:00
|
|
|
/// The active profile name used to derive this `Config` (if any).
|
|
|
|
|
pub active_profile: Option<String>,
|
|
|
|
|
|
2025-10-16 11:23:38 -07:00
|
|
|
/// The currently active project config, resolved by checking if cwd:
|
|
|
|
|
/// is (1) part of a git repo, (2) a git worktree, or (3) just using the cwd
|
|
|
|
|
pub active_project: ProjectConfig,
|
|
|
|
|
|
2025-10-04 17:41:40 -07:00
|
|
|
/// Tracks whether the Windows onboarding screen has been acknowledged.
|
|
|
|
|
pub windows_wsl_setup_acknowledged: bool,
|
|
|
|
|
|
2025-10-16 17:31:46 -07:00
|
|
|
/// Collection of various notices we show the user
|
|
|
|
|
pub notices: Notice,
|
|
|
|
|
|
2025-08-28 12:54:12 -07:00
|
|
|
/// When true, disables burst-paste detection for typed input entirely.
|
|
|
|
|
/// All characters are inserted as they are received, and no buffering
|
|
|
|
|
/// or placeholder replacement will occur for fast keypress bursts.
|
|
|
|
|
pub disable_paste_burst: bool,
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
|
|
|
|
|
/// OTEL configuration (exporter type, endpoint, headers, etc.).
|
2025-10-30 10:28:32 +00:00
|
|
|
pub otel: crate::config::types::OtelConfig,
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
}
|
|
|
|
|
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
impl Config {
|
2025-10-03 13:02:26 -07:00
|
|
|
pub async fn load_with_cli_overrides(
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
cli_overrides: Vec<(String, TomlValue)>,
|
|
|
|
|
overrides: ConfigOverrides,
|
|
|
|
|
) -> std::io::Result<Self> {
|
|
|
|
|
let codex_home = find_codex_home()?;
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let root_value = load_resolved_config(
|
|
|
|
|
&codex_home,
|
|
|
|
|
cli_overrides,
|
|
|
|
|
crate::config_loader::LoaderOverrides::default(),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
|
|
|
|
|
let cfg: ConfigToml = root_value.try_into().map_err(|e| {
|
|
|
|
|
tracing::error!("Failed to deserialize overridden config: {e}");
|
|
|
|
|
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
Self::load_from_base_config_with_overrides(cfg, overrides, codex_home)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
pub async fn load_config_as_toml_with_cli_overrides(
|
2025-08-07 09:27:38 -07:00
|
|
|
codex_home: &Path,
|
|
|
|
|
cli_overrides: Vec<(String, TomlValue)>,
|
|
|
|
|
) -> std::io::Result<ConfigToml> {
|
2025-10-03 13:02:26 -07:00
|
|
|
let root_value = load_resolved_config(
|
|
|
|
|
codex_home,
|
|
|
|
|
cli_overrides,
|
|
|
|
|
crate::config_loader::LoaderOverrides::default(),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
2025-08-07 09:27:38 -07:00
|
|
|
|
|
|
|
|
let cfg: ConfigToml = root_value.try_into().map_err(|e| {
|
|
|
|
|
tracing::error!("Failed to deserialize overridden config: {e}");
|
|
|
|
|
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
Ok(cfg)
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
async fn load_resolved_config(
|
|
|
|
|
codex_home: &Path,
|
|
|
|
|
cli_overrides: Vec<(String, TomlValue)>,
|
|
|
|
|
overrides: crate::config_loader::LoaderOverrides,
|
|
|
|
|
) -> std::io::Result<TomlValue> {
|
|
|
|
|
let layers = load_config_layers_with_overrides(codex_home, overrides).await?;
|
|
|
|
|
Ok(apply_overlays(layers, cli_overrides))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn apply_overlays(
|
|
|
|
|
layers: LoadedConfigLayers,
|
|
|
|
|
cli_overrides: Vec<(String, TomlValue)>,
|
|
|
|
|
) -> TomlValue {
|
|
|
|
|
let LoadedConfigLayers {
|
|
|
|
|
mut base,
|
|
|
|
|
managed_config,
|
|
|
|
|
managed_preferences,
|
|
|
|
|
} = layers;
|
|
|
|
|
|
|
|
|
|
for (path, value) in cli_overrides.into_iter() {
|
|
|
|
|
apply_toml_override(&mut base, &path, value);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for overlay in [managed_config, managed_preferences].into_iter().flatten() {
|
|
|
|
|
merge_toml_values(&mut base, &overlay);
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
}
|
2025-10-03 13:02:26 -07:00
|
|
|
|
|
|
|
|
base
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
pub async fn load_global_mcp_servers(
|
2025-09-14 21:30:56 -07:00
|
|
|
codex_home: &Path,
|
|
|
|
|
) -> std::io::Result<BTreeMap<String, McpServerConfig>> {
|
2025-10-03 13:02:26 -07:00
|
|
|
let root_value = load_config_as_toml(codex_home).await?;
|
2025-09-14 21:30:56 -07:00
|
|
|
let Some(servers_value) = root_value.get("mcp_servers") else {
|
|
|
|
|
return Ok(BTreeMap::new());
|
|
|
|
|
};
|
|
|
|
|
|
2025-10-07 20:21:37 -07:00
|
|
|
ensure_no_inline_bearer_tokens(servers_value)?;
|
|
|
|
|
|
2025-09-14 21:30:56 -07:00
|
|
|
servers_value
|
|
|
|
|
.clone()
|
|
|
|
|
.try_into()
|
|
|
|
|
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-07 20:21:37 -07:00
|
|
|
/// We briefly allowed plain text bearer_token fields in MCP server configs.
|
|
|
|
|
/// We want to warn people who recently added these fields but can remove this after a few months.
|
|
|
|
|
fn ensure_no_inline_bearer_tokens(value: &TomlValue) -> std::io::Result<()> {
|
|
|
|
|
let Some(servers_table) = value.as_table() else {
|
|
|
|
|
return Ok(());
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (server_name, server_value) in servers_table {
|
|
|
|
|
if let Some(server_table) = server_value.as_table()
|
|
|
|
|
&& server_table.contains_key("bearer_token")
|
|
|
|
|
{
|
|
|
|
|
let message = format!(
|
|
|
|
|
"mcp_servers.{server_name} uses unsupported `bearer_token`; set `bearer_token_env_var`."
|
|
|
|
|
);
|
|
|
|
|
return Err(std::io::Error::new(ErrorKind::InvalidData, message));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
pub(crate) fn set_project_trusted_inner(
|
|
|
|
|
doc: &mut DocumentMut,
|
|
|
|
|
project_path: &Path,
|
|
|
|
|
) -> anyhow::Result<()> {
|
2025-08-21 13:20:36 -07:00
|
|
|
// Ensure we render a human-friendly structure:
|
|
|
|
|
//
|
|
|
|
|
// [projects]
|
|
|
|
|
// [projects."/path/to/project"]
|
|
|
|
|
// trust_level = "trusted"
|
|
|
|
|
//
|
|
|
|
|
// rather than inline tables like:
|
|
|
|
|
//
|
|
|
|
|
// [projects]
|
|
|
|
|
// "/path/to/project" = { trust_level = "trusted" }
|
2025-08-07 09:27:38 -07:00
|
|
|
let project_key = project_path.to_string_lossy().to_string();
|
2025-08-21 13:20:36 -07:00
|
|
|
|
|
|
|
|
// Ensure top-level `projects` exists as a non-inline, explicit table. If it
|
|
|
|
|
// exists but was previously represented as a non-table (e.g., inline),
|
|
|
|
|
// replace it with an explicit table.
|
|
|
|
|
{
|
|
|
|
|
let root = doc.as_table_mut();
|
2025-09-10 16:01:31 -07:00
|
|
|
// If `projects` exists but isn't a standard table (e.g., it's an inline table),
|
|
|
|
|
// convert it to an explicit table while preserving existing entries.
|
|
|
|
|
let existing_projects = root.get("projects").cloned();
|
|
|
|
|
if existing_projects.as_ref().is_none_or(|i| !i.is_table()) {
|
|
|
|
|
let mut projects_tbl = toml_edit::Table::new();
|
|
|
|
|
projects_tbl.set_implicit(true);
|
|
|
|
|
|
|
|
|
|
// If there was an existing inline table, migrate its entries to explicit tables.
|
|
|
|
|
if let Some(inline_tbl) = existing_projects.as_ref().and_then(|i| i.as_inline_table()) {
|
|
|
|
|
for (k, v) in inline_tbl.iter() {
|
|
|
|
|
if let Some(inner_tbl) = v.as_inline_table() {
|
|
|
|
|
let new_tbl = inner_tbl.clone().into_table();
|
|
|
|
|
projects_tbl.insert(k, toml_edit::Item::Table(new_tbl));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
root.insert("projects", toml_edit::Item::Table(projects_tbl));
|
2025-08-21 13:20:36 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
let Some(projects_tbl) = doc["projects"].as_table_mut() else {
|
|
|
|
|
return Err(anyhow::anyhow!(
|
|
|
|
|
"projects table missing after initialization"
|
|
|
|
|
));
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Ensure the per-project entry is its own explicit table. If it exists but
|
|
|
|
|
// is not a table (e.g., an inline table), replace it with an explicit table.
|
|
|
|
|
let needs_proj_table = !projects_tbl.contains_key(project_key.as_str())
|
|
|
|
|
|| projects_tbl
|
|
|
|
|
.get(project_key.as_str())
|
|
|
|
|
.and_then(|i| i.as_table())
|
|
|
|
|
.is_none();
|
|
|
|
|
if needs_proj_table {
|
|
|
|
|
projects_tbl.insert(project_key.as_str(), toml_edit::table());
|
|
|
|
|
}
|
|
|
|
|
let Some(proj_tbl) = projects_tbl
|
|
|
|
|
.get_mut(project_key.as_str())
|
|
|
|
|
.and_then(|i| i.as_table_mut())
|
|
|
|
|
else {
|
2025-09-30 03:10:33 -07:00
|
|
|
return Err(anyhow::anyhow!("project table missing for {project_key}"));
|
2025-08-21 13:20:36 -07:00
|
|
|
};
|
|
|
|
|
proj_tbl.set_implicit(false);
|
|
|
|
|
proj_tbl["trust_level"] = toml_edit::value("trusted");
|
2025-09-10 16:01:31 -07:00
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Patch `CODEX_HOME/config.toml` project state.
|
|
|
|
|
/// Use with caution.
|
|
|
|
|
pub fn set_project_trusted(codex_home: &Path, project_path: &Path) -> anyhow::Result<()> {
|
2025-10-30 10:28:32 +00:00
|
|
|
use crate::config::edit::ConfigEditsBuilder;
|
2025-09-11 15:04:29 -07:00
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
ConfigEditsBuilder::new(codex_home)
|
|
|
|
|
.set_project_trusted(project_path)
|
|
|
|
|
.apply_blocking()
|
2025-09-11 15:04:29 -07:00
|
|
|
}
|
|
|
|
|
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
/// Apply a single dotted-path override onto a TOML value.
|
|
|
|
|
fn apply_toml_override(root: &mut TomlValue, path: &str, value: TomlValue) {
|
|
|
|
|
use toml::value::Table;
|
|
|
|
|
|
|
|
|
|
let segments: Vec<&str> = path.split('.').collect();
|
|
|
|
|
let mut current = root;
|
|
|
|
|
|
|
|
|
|
for (idx, segment) in segments.iter().enumerate() {
|
|
|
|
|
let is_last = idx == segments.len() - 1;
|
|
|
|
|
|
|
|
|
|
if is_last {
|
|
|
|
|
match current {
|
|
|
|
|
TomlValue::Table(table) => {
|
|
|
|
|
table.insert(segment.to_string(), value);
|
|
|
|
|
}
|
|
|
|
|
_ => {
|
|
|
|
|
let mut table = Table::new();
|
|
|
|
|
table.insert(segment.to_string(), value);
|
|
|
|
|
*current = TomlValue::Table(table);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Traverse or create intermediate object.
|
|
|
|
|
match current {
|
|
|
|
|
TomlValue::Table(table) => {
|
|
|
|
|
current = table
|
|
|
|
|
.entry(segment.to_string())
|
|
|
|
|
.or_insert_with(|| TomlValue::Table(Table::new()));
|
|
|
|
|
}
|
|
|
|
|
_ => {
|
|
|
|
|
*current = TomlValue::Table(Table::new());
|
|
|
|
|
if let TomlValue::Table(tbl) = current {
|
|
|
|
|
current = tbl
|
|
|
|
|
.entry(segment.to_string())
|
|
|
|
|
.or_insert_with(|| TomlValue::Table(Table::new()));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
/// Base config deserialized from ~/.codex/config.toml.
|
2025-09-11 23:44:17 -07:00
|
|
|
#[derive(Deserialize, Debug, Clone, Default, PartialEq)]
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
pub struct ConfigToml {
|
|
|
|
|
/// Optional override of model selection.
|
|
|
|
|
pub model: Option<String>,
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
/// Review model override used by the `/review` feature.
|
|
|
|
|
pub review_model: Option<String>,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
|
2025-05-07 17:38:28 -07:00
|
|
|
/// Provider to use from the model_providers map.
|
|
|
|
|
pub model_provider: Option<String>,
|
|
|
|
|
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
/// Size of the context window for the model, in tokens.
|
2025-10-20 11:29:49 -07:00
|
|
|
pub model_context_window: Option<i64>,
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
|
|
|
|
|
/// Maximum number of output tokens.
|
2025-10-20 11:29:49 -07:00
|
|
|
pub model_max_output_tokens: Option<i64>,
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
|
2025-09-12 13:07:10 -07:00
|
|
|
/// Token usage threshold triggering auto-compaction of conversation history.
|
|
|
|
|
pub model_auto_compact_token_limit: Option<i64>,
|
|
|
|
|
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
/// Default approval policy for executing commands.
|
|
|
|
|
pub approval_policy: Option<AskForApproval>,
|
|
|
|
|
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
#[serde(default)]
|
|
|
|
|
pub shell_environment_policy: ShellEnvironmentPolicyToml,
|
|
|
|
|
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
/// Sandbox mode to use.
|
|
|
|
|
pub sandbox_mode: Option<SandboxMode>,
|
|
|
|
|
|
|
|
|
|
/// Sandbox configuration to apply if `sandbox` is `WorkspaceWrite`.
|
2025-08-07 01:30:13 -07:00
|
|
|
pub sandbox_workspace_write: Option<SandboxWorkspaceWrite>,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
|
feat: configurable notifications in the Rust CLI (#793)
With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:
```rust
pub(crate) enum UserNotification {
#[serde(rename_all = "kebab-case")]
AgentTurnComplete {
turn_id: String,
/// Messages that the user sent to the agent to initiate the turn.
input_messages: Vec<String>,
/// The last message sent by the assistant in the turn.
last_assistant_message: Option<String>,
},
}
```
This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.
For example, I have the following in my `~/.codex/config.toml`:
```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```
I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:
```python
#!/usr/bin/env python3
import json
import subprocess
import sys
def main() -> int:
if len(sys.argv) != 2:
print("Usage: notify.py <NOTIFICATION_JSON>")
return 1
try:
notification = json.loads(sys.argv[1])
except json.JSONDecodeError:
return 1
match notification_type := notification.get("type"):
case "agent-turn-complete":
assistant_message = notification.get("last-assistant-message")
if assistant_message:
title = f"Codex: {assistant_message}"
else:
title = "Codex: Turn Complete!"
input_messages = notification.get("input_messages", [])
message = " ".join(input_messages)
title += message
case _:
print(f"not sending a push notification for: {notification_type}")
return 0
subprocess.check_output(
[
"terminal-notifier",
"-title",
title,
"-message",
message,
"-group",
"codex",
"-ignoreDnD",
"-activate",
"com.googlecode.iterm2",
]
)
return 0
if __name__ == "__main__":
sys.exit(main())
```
For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:
* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
2025-05-02 19:48:13 -07:00
|
|
|
/// Optional external command to spawn for end-user notifications.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub notify: Option<Vec<String>>,
|
|
|
|
|
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
/// System instructions.
|
|
|
|
|
pub instructions: Option<String>,
|
feat: support mcp_servers in config.toml (#829)
This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.
(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)
`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.
This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)
To test, I added the following to my `~/.codex/config.toml`:
```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```
And then I ran the following:
```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):
This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph
Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```
Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
2025-05-06 15:47:59 -07:00
|
|
|
|
2025-10-20 08:50:54 -07:00
|
|
|
/// When set, restricts ChatGPT login to a specific workspace identifier.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub forced_chatgpt_workspace_id: Option<String>,
|
|
|
|
|
|
|
|
|
|
/// When set, restricts the login mechanism users may use.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub forced_login_method: Option<ForcedLoginMethod>,
|
|
|
|
|
|
2025-10-27 19:41:49 -07:00
|
|
|
/// Preferred backend for storing CLI auth credentials.
|
|
|
|
|
/// file (default): Use a file in the Codex home directory.
|
|
|
|
|
/// keyring: Use an OS-specific keyring service.
|
|
|
|
|
/// auto: Use the keyring if available, otherwise use a file.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub cli_auth_credentials_store: Option<AuthCredentialsStoreMode>,
|
|
|
|
|
|
feat: support mcp_servers in config.toml (#829)
This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.
(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)
`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.
This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)
To test, I added the following to my `~/.codex/config.toml`:
```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```
And then I ran the following:
```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):
This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph
Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```
Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
2025-05-06 15:47:59 -07:00
|
|
|
/// Definition for MCP servers that Codex can reach out to for tool calls.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub mcp_servers: HashMap<String, McpServerConfig>,
|
2025-05-07 17:38:28 -07:00
|
|
|
|
2025-10-07 19:39:32 -07:00
|
|
|
/// Preferred backend for storing MCP OAuth credentials.
|
|
|
|
|
/// keyring: Use an OS-specific keyring service.
|
|
|
|
|
/// https://github.com/openai/codex/blob/main/codex-rs/rmcp-client/src/oauth.rs#L2
|
|
|
|
|
/// file: Use a file in the Codex home directory.
|
|
|
|
|
/// auto (default): Use the OS-specific keyring service if available, otherwise use a file.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub mcp_oauth_credentials_store: Option<OAuthCredentialsStoreMode>,
|
|
|
|
|
|
2025-05-07 17:38:28 -07:00
|
|
|
/// User-defined provider entries that extend/override the built-in list.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub model_providers: HashMap<String, ModelProviderInfo>,
|
2025-05-10 17:52:59 -07:00
|
|
|
|
|
|
|
|
/// Maximum number of bytes to include from an AGENTS.md project doc file.
|
|
|
|
|
pub project_doc_max_bytes: Option<usize>,
|
2025-05-13 16:52:52 -07:00
|
|
|
|
2025-10-01 11:19:59 -07:00
|
|
|
/// Ordered list of fallback filenames to look for when AGENTS.md is missing.
|
|
|
|
|
pub project_doc_fallback_filenames: Option<Vec<String>>,
|
|
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
/// Profile to use from the `profiles` map.
|
|
|
|
|
pub profile: Option<String>,
|
|
|
|
|
|
|
|
|
|
/// Named profiles to facilitate switching between different configurations.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub profiles: HashMap<String, ConfigProfile>,
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
|
|
|
|
|
/// Settings that govern if and what will be written to `~/.codex/history.jsonl`.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub history: Option<History>,
|
2025-05-16 11:33:08 -07:00
|
|
|
|
|
|
|
|
/// Optional URI-based file opener. If set, citations to files in the model
|
|
|
|
|
/// output will be hyperlinked using the specified URI scheme.
|
|
|
|
|
pub file_opener: Option<UriBasedFileOpener>,
|
2025-05-16 16:16:50 -07:00
|
|
|
|
|
|
|
|
/// Collection of settings that are specific to the TUI.
|
|
|
|
|
pub tui: Option<Tui>,
|
2025-05-30 23:14:56 -07:00
|
|
|
|
|
|
|
|
/// When set to `true`, `AgentReasoning` events will be hidden from the
|
|
|
|
|
/// UI/output. Defaults to `false`.
|
|
|
|
|
pub hide_agent_reasoning: Option<bool>,
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
|
2025-08-05 01:56:13 -07:00
|
|
|
/// When set to `true`, `AgentReasoningRawContentEvent` events will be shown in the UI/output.
|
|
|
|
|
/// Defaults to `false`.
|
|
|
|
|
pub show_raw_agent_reasoning: Option<bool>,
|
|
|
|
|
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
pub model_reasoning_effort: Option<ReasoningEffort>,
|
|
|
|
|
pub model_reasoning_summary: Option<ReasoningSummary>,
|
2025-08-22 17:12:10 +01:00
|
|
|
/// Optional verbosity control for GPT-5 models (Responses API `text.verbosity`).
|
|
|
|
|
pub model_verbosity: Option<Verbosity>,
|
2025-07-10 14:30:33 -07:00
|
|
|
|
|
|
|
|
/// Override to force-enable reasoning summaries for the configured model.
|
|
|
|
|
pub model_supports_reasoning_summaries: Option<bool>,
|
2025-07-11 13:30:11 -04:00
|
|
|
|
2025-09-04 11:00:01 -07:00
|
|
|
/// Override to force reasoning summary format for the configured model.
|
|
|
|
|
pub model_reasoning_summary_format: Option<ReasoningSummaryFormat>,
|
|
|
|
|
|
2025-07-11 13:30:11 -04:00
|
|
|
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
|
|
|
|
pub chatgpt_base_url: Option<String>,
|
2025-07-18 17:04:04 -07:00
|
|
|
|
2025-08-07 09:27:38 -07:00
|
|
|
pub projects: Option<HashMap<String, ProjectConfig>>,
|
2025-08-18 20:22:48 -07:00
|
|
|
|
2025-08-23 22:58:56 -07:00
|
|
|
/// Nested tools section for feature toggles
|
|
|
|
|
pub tools: Option<ToolsToml>,
|
2025-08-28 12:54:12 -07:00
|
|
|
|
2025-10-14 18:50:00 +01:00
|
|
|
/// Centralized feature flags (new). Prefer this over individual toggles.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub features: Option<FeaturesToml>,
|
|
|
|
|
|
2025-08-28 12:54:12 -07:00
|
|
|
/// When true, disables burst-paste detection for typed input entirely.
|
|
|
|
|
/// All characters are inserted as they are received, and no buffering
|
|
|
|
|
/// or placeholder replacement will occur for fast keypress bursts.
|
|
|
|
|
pub disable_paste_burst: Option<bool>,
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
|
|
|
|
|
/// OTEL configuration.
|
2025-10-30 10:28:32 +00:00
|
|
|
pub otel: Option<crate::config::types::OtelConfigToml>,
|
2025-10-04 17:41:40 -07:00
|
|
|
|
|
|
|
|
/// Tracks whether the Windows onboarding screen has been acknowledged.
|
|
|
|
|
pub windows_wsl_setup_acknowledged: Option<bool>,
|
2025-10-14 18:50:00 +01:00
|
|
|
|
2025-10-16 17:31:46 -07:00
|
|
|
/// Collection of in-product notices (different from notifications)
|
2025-10-30 10:28:32 +00:00
|
|
|
/// See [`crate::config::types::Notices`] for more details
|
2025-10-16 17:31:46 -07:00
|
|
|
pub notice: Option<Notice>,
|
|
|
|
|
|
2025-10-14 18:50:00 +01:00
|
|
|
/// Legacy, now use features
|
|
|
|
|
pub experimental_instructions_file: Option<PathBuf>,
|
|
|
|
|
pub experimental_use_exec_command_tool: Option<bool>,
|
|
|
|
|
pub experimental_use_unified_exec_tool: Option<bool>,
|
|
|
|
|
pub experimental_use_rmcp_client: Option<bool>,
|
|
|
|
|
pub experimental_use_freeform_apply_patch: Option<bool>,
|
2025-10-24 17:23:44 -05:00
|
|
|
pub experimental_sandbox_command_assessment: Option<bool>,
|
2025-08-07 09:27:38 -07:00
|
|
|
}
|
|
|
|
|
|
2025-09-04 16:26:41 -07:00
|
|
|
impl From<ConfigToml> for UserSavedConfig {
|
|
|
|
|
fn from(config_toml: ConfigToml) -> Self {
|
|
|
|
|
let profiles = config_toml
|
|
|
|
|
.profiles
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|(k, v)| (k, v.into()))
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
Self {
|
|
|
|
|
approval_policy: config_toml.approval_policy,
|
|
|
|
|
sandbox_mode: config_toml.sandbox_mode,
|
|
|
|
|
sandbox_settings: config_toml.sandbox_workspace_write.map(From::from),
|
2025-10-20 08:50:54 -07:00
|
|
|
forced_chatgpt_workspace_id: config_toml.forced_chatgpt_workspace_id,
|
|
|
|
|
forced_login_method: config_toml.forced_login_method,
|
2025-09-04 16:26:41 -07:00
|
|
|
model: config_toml.model,
|
|
|
|
|
model_reasoning_effort: config_toml.model_reasoning_effort,
|
|
|
|
|
model_reasoning_summary: config_toml.model_reasoning_summary,
|
|
|
|
|
model_verbosity: config_toml.model_verbosity,
|
|
|
|
|
tools: config_toml.tools.map(From::from),
|
|
|
|
|
profile: config_toml.profile,
|
|
|
|
|
profiles,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-07 09:27:38 -07:00
|
|
|
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
|
|
|
|
pub struct ProjectConfig {
|
|
|
|
|
pub trust_level: Option<String>,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
}
|
|
|
|
|
|
2025-10-16 11:23:38 -07:00
|
|
|
impl ProjectConfig {
|
|
|
|
|
pub fn is_trusted(&self) -> bool {
|
|
|
|
|
match &self.trust_level {
|
|
|
|
|
Some(trust_level) => trust_level == "trusted",
|
|
|
|
|
None => false,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-11 23:44:17 -07:00
|
|
|
#[derive(Deserialize, Debug, Clone, Default, PartialEq)]
|
2025-08-23 22:58:56 -07:00
|
|
|
pub struct ToolsToml {
|
|
|
|
|
#[serde(default, alias = "web_search_request")]
|
|
|
|
|
pub web_search: Option<bool>,
|
2025-08-27 17:41:23 -07:00
|
|
|
|
|
|
|
|
/// Enable the `view_image` tool that lets the agent attach local images.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub view_image: Option<bool>,
|
2025-08-23 22:58:56 -07:00
|
|
|
}
|
|
|
|
|
|
2025-09-04 16:26:41 -07:00
|
|
|
impl From<ToolsToml> for Tools {
|
|
|
|
|
fn from(tools_toml: ToolsToml) -> Self {
|
|
|
|
|
Self {
|
|
|
|
|
web_search: tools_toml.web_search,
|
|
|
|
|
view_image: tools_toml.view_image,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-27 18:19:32 -07:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
|
|
|
pub struct SandboxPolicyResolution {
|
|
|
|
|
pub policy: SandboxPolicy,
|
|
|
|
|
pub forced_auto_mode_downgraded_on_windows: bool,
|
|
|
|
|
}
|
|
|
|
|
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
impl ConfigToml {
|
|
|
|
|
/// Derive the effective sandbox policy from the configuration.
|
2025-10-16 11:23:38 -07:00
|
|
|
fn derive_sandbox_policy(
|
|
|
|
|
&self,
|
|
|
|
|
sandbox_mode_override: Option<SandboxMode>,
|
2025-10-25 18:52:26 -05:00
|
|
|
profile_sandbox_mode: Option<SandboxMode>,
|
2025-10-16 11:23:38 -07:00
|
|
|
resolved_cwd: &Path,
|
2025-10-27 18:19:32 -07:00
|
|
|
) -> SandboxPolicyResolution {
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
let resolved_sandbox_mode = sandbox_mode_override
|
2025-10-25 18:52:26 -05:00
|
|
|
.or(profile_sandbox_mode)
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
.or(self.sandbox_mode)
|
2025-10-16 11:23:38 -07:00
|
|
|
.or_else(|| {
|
|
|
|
|
// if no sandbox_mode is set, but user has marked directory as trusted, use WorkspaceWrite
|
|
|
|
|
self.get_active_project(resolved_cwd).and_then(|p| {
|
|
|
|
|
if p.is_trusted() {
|
|
|
|
|
Some(SandboxMode::WorkspaceWrite)
|
|
|
|
|
} else {
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
})
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
.unwrap_or_default();
|
2025-10-27 18:19:32 -07:00
|
|
|
let mut sandbox_policy = match resolved_sandbox_mode {
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
SandboxMode::ReadOnly => SandboxPolicy::new_read_only_policy(),
|
|
|
|
|
SandboxMode::WorkspaceWrite => match self.sandbox_workspace_write.as_ref() {
|
2025-08-07 01:30:13 -07:00
|
|
|
Some(SandboxWorkspaceWrite {
|
2025-08-07 00:17:00 -07:00
|
|
|
writable_roots,
|
|
|
|
|
network_access,
|
|
|
|
|
exclude_tmpdir_env_var,
|
|
|
|
|
exclude_slash_tmp,
|
|
|
|
|
}) => SandboxPolicy::WorkspaceWrite {
|
|
|
|
|
writable_roots: writable_roots.clone(),
|
|
|
|
|
network_access: *network_access,
|
|
|
|
|
exclude_tmpdir_env_var: *exclude_tmpdir_env_var,
|
|
|
|
|
exclude_slash_tmp: *exclude_slash_tmp,
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
},
|
|
|
|
|
None => SandboxPolicy::new_workspace_write_policy(),
|
|
|
|
|
},
|
|
|
|
|
SandboxMode::DangerFullAccess => SandboxPolicy::DangerFullAccess,
|
2025-10-27 18:19:32 -07:00
|
|
|
};
|
|
|
|
|
let mut forced_auto_mode_downgraded_on_windows = false;
|
|
|
|
|
if cfg!(target_os = "windows")
|
|
|
|
|
&& matches!(resolved_sandbox_mode, SandboxMode::WorkspaceWrite)
|
|
|
|
|
{
|
|
|
|
|
sandbox_policy = SandboxPolicy::new_read_only_policy();
|
|
|
|
|
forced_auto_mode_downgraded_on_windows = true;
|
|
|
|
|
}
|
|
|
|
|
SandboxPolicyResolution {
|
|
|
|
|
policy: sandbox_policy,
|
|
|
|
|
forced_auto_mode_downgraded_on_windows,
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
}
|
|
|
|
|
}
|
2025-08-07 09:27:38 -07:00
|
|
|
|
2025-10-16 11:23:38 -07:00
|
|
|
/// Resolves the cwd to an existing project, or returns None if ConfigToml
|
|
|
|
|
/// does not contain a project corresponding to cwd or a git repo for cwd
|
|
|
|
|
pub fn get_active_project(&self, resolved_cwd: &Path) -> Option<ProjectConfig> {
|
2025-08-07 09:27:38 -07:00
|
|
|
let projects = self.projects.clone().unwrap_or_default();
|
|
|
|
|
|
2025-10-16 11:23:38 -07:00
|
|
|
if let Some(project_config) = projects.get(&resolved_cwd.to_string_lossy().to_string()) {
|
|
|
|
|
return Some(project_config.clone());
|
2025-08-22 13:54:51 -07:00
|
|
|
}
|
|
|
|
|
|
2025-10-16 11:23:38 -07:00
|
|
|
// If cwd lives inside a git repo/worktree, check whether the root git project
|
2025-08-22 13:54:51 -07:00
|
|
|
// (the primary repository working directory) is trusted. This lets
|
|
|
|
|
// worktrees inherit trust from the main project.
|
2025-10-16 11:23:38 -07:00
|
|
|
if let Some(repo_root) = resolve_root_git_project_for_trust(resolved_cwd)
|
|
|
|
|
&& let Some(project_config_for_root) =
|
|
|
|
|
projects.get(&repo_root.to_string_lossy().to_string_lossy().to_string())
|
|
|
|
|
{
|
|
|
|
|
return Some(project_config_for_root.clone());
|
2025-08-22 13:54:51 -07:00
|
|
|
}
|
|
|
|
|
|
2025-10-16 11:23:38 -07:00
|
|
|
None
|
2025-08-07 09:27:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn get_config_profile(
|
|
|
|
|
&self,
|
|
|
|
|
override_profile: Option<String>,
|
|
|
|
|
) -> Result<ConfigProfile, std::io::Error> {
|
|
|
|
|
let profile = override_profile.or_else(|| self.profile.clone());
|
|
|
|
|
|
|
|
|
|
match profile {
|
|
|
|
|
Some(key) => {
|
|
|
|
|
if let Some(profile) = self.profiles.get(key.as_str()) {
|
|
|
|
|
return Ok(profile.clone());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Err(std::io::Error::new(
|
|
|
|
|
std::io::ErrorKind::NotFound,
|
|
|
|
|
format!("config profile `{key}` not found"),
|
|
|
|
|
))
|
|
|
|
|
}
|
|
|
|
|
None => Ok(ConfigProfile::default()),
|
|
|
|
|
}
|
|
|
|
|
}
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
}
|
|
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
/// Optional overrides for user configuration (e.g., from CLI flags).
|
|
|
|
|
#[derive(Default, Debug, Clone)]
|
|
|
|
|
pub struct ConfigOverrides {
|
|
|
|
|
pub model: Option<String>,
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
pub review_model: Option<String>,
|
2025-05-04 10:57:12 -07:00
|
|
|
pub cwd: Option<PathBuf>,
|
2025-04-27 21:47:50 -07:00
|
|
|
pub approval_policy: Option<AskForApproval>,
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
pub sandbox_mode: Option<SandboxMode>,
|
2025-05-13 16:52:52 -07:00
|
|
|
pub model_provider: Option<String>,
|
|
|
|
|
pub config_profile: Option<String>,
|
2025-05-22 21:52:28 -07:00
|
|
|
pub codex_linux_sandbox_exe: Option<PathBuf>,
|
2025-07-22 09:42:22 -07:00
|
|
|
pub base_instructions: Option<String>,
|
2025-08-15 11:55:53 -04:00
|
|
|
pub include_apply_patch_tool: Option<bool>,
|
2025-08-27 17:41:23 -07:00
|
|
|
pub include_view_image_tool: Option<bool>,
|
2025-08-05 14:42:49 -07:00
|
|
|
pub show_raw_agent_reasoning: Option<bool>,
|
2025-08-23 22:58:56 -07:00
|
|
|
pub tools_web_search_request: Option<bool>,
|
2025-10-24 17:23:44 -05:00
|
|
|
pub experimental_sandbox_command_assessment: Option<bool>,
|
2025-10-18 22:13:53 -07:00
|
|
|
/// Additional directories that should be treated as writable roots for this session.
|
|
|
|
|
pub additional_writable_roots: Vec<PathBuf>,
|
2025-04-27 21:47:50 -07:00
|
|
|
}
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
impl Config {
|
2025-05-15 00:30:13 -07:00
|
|
|
/// Meant to be used exclusively for tests: `load_with_overrides()` should
|
|
|
|
|
/// be used in all other cases.
|
|
|
|
|
pub fn load_from_base_config_with_overrides(
|
2025-05-07 17:38:28 -07:00
|
|
|
cfg: ConfigToml,
|
|
|
|
|
overrides: ConfigOverrides,
|
2025-05-15 00:30:13 -07:00
|
|
|
codex_home: PathBuf,
|
2025-05-07 17:38:28 -07:00
|
|
|
) -> std::io::Result<Self> {
|
2025-07-22 09:42:22 -07:00
|
|
|
let user_instructions = Self::load_instructions(Some(&codex_home));
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
// Destructure ConfigOverrides fully to ensure all overrides are applied.
|
|
|
|
|
let ConfigOverrides {
|
|
|
|
|
model,
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
review_model: override_review_model,
|
2025-05-04 10:57:12 -07:00
|
|
|
cwd,
|
2025-10-16 11:23:38 -07:00
|
|
|
approval_policy: approval_policy_override,
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
sandbox_mode,
|
2025-05-13 16:52:52 -07:00
|
|
|
model_provider,
|
|
|
|
|
config_profile: config_profile_key,
|
2025-05-22 21:52:28 -07:00
|
|
|
codex_linux_sandbox_exe,
|
2025-07-22 09:42:22 -07:00
|
|
|
base_instructions,
|
2025-10-14 18:50:00 +01:00
|
|
|
include_apply_patch_tool: include_apply_patch_tool_override,
|
|
|
|
|
include_view_image_tool: include_view_image_tool_override,
|
2025-08-05 14:42:49 -07:00
|
|
|
show_raw_agent_reasoning,
|
2025-08-23 22:58:56 -07:00
|
|
|
tools_web_search_request: override_tools_web_search_request,
|
2025-10-24 17:23:44 -05:00
|
|
|
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
|
2025-10-18 22:13:53 -07:00
|
|
|
additional_writable_roots,
|
2025-04-27 21:47:50 -07:00
|
|
|
} = overrides;
|
|
|
|
|
|
2025-09-10 13:53:46 -07:00
|
|
|
let active_profile_name = config_profile_key
|
|
|
|
|
.as_ref()
|
|
|
|
|
.or(cfg.profile.as_ref())
|
|
|
|
|
.cloned();
|
|
|
|
|
let config_profile = match active_profile_name.as_ref() {
|
2025-05-13 16:52:52 -07:00
|
|
|
Some(key) => cfg
|
|
|
|
|
.profiles
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
.get(key)
|
2025-05-13 16:52:52 -07:00
|
|
|
.ok_or_else(|| {
|
|
|
|
|
std::io::Error::new(
|
|
|
|
|
std::io::ErrorKind::NotFound,
|
|
|
|
|
format!("config profile `{key}` not found"),
|
|
|
|
|
)
|
|
|
|
|
})?
|
|
|
|
|
.clone(),
|
|
|
|
|
None => ConfigProfile::default(),
|
|
|
|
|
};
|
|
|
|
|
|
2025-10-14 18:50:00 +01:00
|
|
|
let feature_overrides = FeatureOverrides {
|
|
|
|
|
include_apply_patch_tool: include_apply_patch_tool_override,
|
|
|
|
|
include_view_image_tool: include_view_image_tool_override,
|
|
|
|
|
web_search_request: override_tools_web_search_request,
|
2025-10-24 17:23:44 -05:00
|
|
|
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
|
2025-10-14 18:50:00 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let features = Features::from_config(&cfg, &config_profile, feature_overrides);
|
|
|
|
|
|
2025-10-16 11:23:38 -07:00
|
|
|
let resolved_cwd = {
|
|
|
|
|
use std::env;
|
|
|
|
|
|
|
|
|
|
match cwd {
|
|
|
|
|
None => {
|
|
|
|
|
tracing::info!("cwd not set, using current dir");
|
|
|
|
|
env::current_dir()?
|
|
|
|
|
}
|
|
|
|
|
Some(p) if p.is_absolute() => p,
|
|
|
|
|
Some(p) => {
|
|
|
|
|
// Resolve relative path against the current working directory.
|
|
|
|
|
tracing::info!("cwd is relative, resolving against current dir");
|
|
|
|
|
let mut current = env::current_dir()?;
|
|
|
|
|
current.push(p);
|
|
|
|
|
current
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
2025-10-18 22:13:53 -07:00
|
|
|
let additional_writable_roots: Vec<PathBuf> = additional_writable_roots
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|path| {
|
|
|
|
|
let absolute = if path.is_absolute() {
|
|
|
|
|
path
|
|
|
|
|
} else {
|
|
|
|
|
resolved_cwd.join(path)
|
|
|
|
|
};
|
|
|
|
|
match canonicalize(&absolute) {
|
|
|
|
|
Ok(canonical) => canonical,
|
|
|
|
|
Err(_) => absolute,
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
2025-10-16 11:23:38 -07:00
|
|
|
let active_project = cfg
|
|
|
|
|
.get_active_project(&resolved_cwd)
|
|
|
|
|
.unwrap_or(ProjectConfig { trust_level: None });
|
|
|
|
|
|
2025-10-27 18:19:32 -07:00
|
|
|
let SandboxPolicyResolution {
|
|
|
|
|
policy: mut sandbox_policy,
|
|
|
|
|
forced_auto_mode_downgraded_on_windows,
|
|
|
|
|
} = cfg.derive_sandbox_policy(sandbox_mode, config_profile.sandbox_mode, &resolved_cwd);
|
2025-10-18 22:13:53 -07:00
|
|
|
if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = &mut sandbox_policy {
|
|
|
|
|
for path in additional_writable_roots {
|
|
|
|
|
if !writable_roots.iter().any(|existing| existing == &path) {
|
|
|
|
|
writable_roots.push(path);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-10-22 16:55:06 +01:00
|
|
|
let approval_policy = approval_policy_override
|
2025-10-16 11:23:38 -07:00
|
|
|
.or(config_profile.approval_policy)
|
|
|
|
|
.or(cfg.approval_policy)
|
|
|
|
|
.unwrap_or_else(|| {
|
|
|
|
|
if active_project.is_trusted() {
|
|
|
|
|
// If no explicit approval policy is set, but we trust cwd, default to OnRequest
|
|
|
|
|
AskForApproval::OnRequest
|
|
|
|
|
} else {
|
|
|
|
|
AskForApproval::default()
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
let did_user_set_custom_approval_policy_or_sandbox_mode = approval_policy_override
|
|
|
|
|
.is_some()
|
|
|
|
|
|| config_profile.approval_policy.is_some()
|
|
|
|
|
|| cfg.approval_policy.is_some()
|
|
|
|
|
|| sandbox_mode.is_some()
|
2025-10-25 18:52:26 -05:00
|
|
|
|| config_profile.sandbox_mode.is_some()
|
2025-10-16 11:23:38 -07:00
|
|
|
|| cfg.sandbox_mode.is_some();
|
2025-04-27 21:47:50 -07:00
|
|
|
|
2025-05-07 17:38:28 -07:00
|
|
|
let mut model_providers = built_in_model_providers();
|
|
|
|
|
// Merge user-defined providers into the built-in list.
|
|
|
|
|
for (key, provider) in cfg.model_providers.into_iter() {
|
|
|
|
|
model_providers.entry(key).or_insert(provider);
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
let model_provider_id = model_provider
|
|
|
|
|
.or(config_profile.model_provider)
|
2025-05-07 17:38:28 -07:00
|
|
|
.or(cfg.model_provider)
|
|
|
|
|
.unwrap_or_else(|| "openai".to_string());
|
|
|
|
|
let model_provider = model_providers
|
2025-05-08 21:46:06 -07:00
|
|
|
.get(&model_provider_id)
|
2025-05-07 17:38:28 -07:00
|
|
|
.ok_or_else(|| {
|
|
|
|
|
std::io::Error::new(
|
|
|
|
|
std::io::ErrorKind::NotFound,
|
2025-05-08 21:46:06 -07:00
|
|
|
format!("Model provider `{model_provider_id}` not found"),
|
2025-05-07 17:38:28 -07:00
|
|
|
)
|
|
|
|
|
})?
|
|
|
|
|
.clone();
|
|
|
|
|
|
2025-08-28 19:24:38 -07:00
|
|
|
let shell_environment_policy = cfg.shell_environment_policy.into();
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
|
2025-08-28 19:24:38 -07:00
|
|
|
let history = cfg.history.unwrap_or_default();
|
2025-08-23 22:58:56 -07:00
|
|
|
|
2025-10-14 18:50:00 +01:00
|
|
|
let include_apply_patch_tool_flag = features.enabled(Feature::ApplyPatchFreeform);
|
|
|
|
|
let include_view_image_tool_flag = features.enabled(Feature::ViewImageTool);
|
|
|
|
|
let tools_web_search_request = features.enabled(Feature::WebSearchRequest);
|
|
|
|
|
let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell);
|
|
|
|
|
let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
|
|
|
|
|
let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);
|
2025-10-24 17:23:44 -05:00
|
|
|
let experimental_sandbox_command_assessment =
|
|
|
|
|
features.enabled(Feature::SandboxCommandAssessment);
|
2025-08-27 17:41:23 -07:00
|
|
|
|
2025-10-20 08:50:54 -07:00
|
|
|
let forced_chatgpt_workspace_id =
|
|
|
|
|
cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| {
|
|
|
|
|
let trimmed = value.trim();
|
|
|
|
|
if trimmed.is_empty() {
|
|
|
|
|
None
|
|
|
|
|
} else {
|
|
|
|
|
Some(trimmed.to_string())
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
let forced_login_method = cfg.forced_login_method;
|
|
|
|
|
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
let model = model
|
|
|
|
|
.or(config_profile.model)
|
|
|
|
|
.or(cfg.model)
|
|
|
|
|
.unwrap_or_else(default_model);
|
2025-09-05 16:56:58 -07:00
|
|
|
|
2025-09-14 15:45:15 -07:00
|
|
|
let mut model_family =
|
|
|
|
|
find_family_for_model(&model).unwrap_or_else(|| derive_default_model_family(&model));
|
2025-08-04 23:50:03 -07:00
|
|
|
|
2025-09-05 16:56:58 -07:00
|
|
|
if let Some(supports_reasoning_summaries) = cfg.model_supports_reasoning_summaries {
|
|
|
|
|
model_family.supports_reasoning_summaries = supports_reasoning_summaries;
|
|
|
|
|
}
|
|
|
|
|
if let Some(model_reasoning_summary_format) = cfg.model_reasoning_summary_format {
|
|
|
|
|
model_family.reasoning_summary_format = model_reasoning_summary_format;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-04 23:50:03 -07:00
|
|
|
let openai_model_info = get_model_info(&model_family);
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
let model_context_window = cfg
|
|
|
|
|
.model_context_window
|
|
|
|
|
.or_else(|| openai_model_info.as_ref().map(|info| info.context_window));
|
|
|
|
|
let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| {
|
|
|
|
|
openai_model_info
|
|
|
|
|
.as_ref()
|
|
|
|
|
.map(|info| info.max_output_tokens)
|
|
|
|
|
});
|
2025-09-12 13:07:10 -07:00
|
|
|
let model_auto_compact_token_limit = cfg.model_auto_compact_token_limit.or_else(|| {
|
|
|
|
|
openai_model_info
|
|
|
|
|
.as_ref()
|
|
|
|
|
.and_then(|info| info.auto_compact_token_limit)
|
|
|
|
|
});
|
2025-07-18 17:04:04 -07:00
|
|
|
|
2025-07-29 10:06:05 -07:00
|
|
|
// Load base instructions override from a file if specified. If the
|
|
|
|
|
// path is relative, resolve it against the effective cwd so the
|
|
|
|
|
// behaviour matches other path-like config values.
|
2025-08-04 09:34:46 -07:00
|
|
|
let experimental_instructions_path = config_profile
|
|
|
|
|
.experimental_instructions_file
|
|
|
|
|
.as_ref()
|
|
|
|
|
.or(cfg.experimental_instructions_file.as_ref());
|
|
|
|
|
let file_base_instructions =
|
|
|
|
|
Self::get_base_instructions(experimental_instructions_path, &resolved_cwd)?;
|
2025-07-29 10:06:05 -07:00
|
|
|
let base_instructions = base_instructions.or(file_base_instructions);
|
2025-07-22 09:42:22 -07:00
|
|
|
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
// Default review model when not set in config; allow CLI override to take precedence.
|
|
|
|
|
let review_model = override_review_model
|
|
|
|
|
.or(cfg.review_model)
|
|
|
|
|
.unwrap_or_else(default_review_model);
|
|
|
|
|
|
2025-05-07 17:38:28 -07:00
|
|
|
let config = Self {
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
model,
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
review_model,
|
2025-08-04 23:50:03 -07:00
|
|
|
model_family,
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
model_context_window,
|
|
|
|
|
model_max_output_tokens,
|
2025-09-12 13:07:10 -07:00
|
|
|
model_auto_compact_token_limit,
|
2025-05-08 21:46:06 -07:00
|
|
|
model_provider_id,
|
2025-05-07 17:38:28 -07:00
|
|
|
model_provider,
|
2025-05-12 08:45:46 -07:00
|
|
|
cwd: resolved_cwd,
|
2025-10-15 19:03:54 +01:00
|
|
|
approval_policy,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
sandbox_policy,
|
2025-10-16 11:23:38 -07:00
|
|
|
did_user_set_custom_approval_policy_or_sandbox_mode,
|
2025-10-27 18:19:32 -07:00
|
|
|
forced_auto_mode_downgraded_on_windows,
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
shell_environment_policy,
|
feat: configurable notifications in the Rust CLI (#793)
With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:
```rust
pub(crate) enum UserNotification {
#[serde(rename_all = "kebab-case")]
AgentTurnComplete {
turn_id: String,
/// Messages that the user sent to the agent to initiate the turn.
input_messages: Vec<String>,
/// The last message sent by the assistant in the turn.
last_assistant_message: Option<String>,
},
}
```
This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.
For example, I have the following in my `~/.codex/config.toml`:
```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```
I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:
```python
#!/usr/bin/env python3
import json
import subprocess
import sys
def main() -> int:
if len(sys.argv) != 2:
print("Usage: notify.py <NOTIFICATION_JSON>")
return 1
try:
notification = json.loads(sys.argv[1])
except json.JSONDecodeError:
return 1
match notification_type := notification.get("type"):
case "agent-turn-complete":
assistant_message = notification.get("last-assistant-message")
if assistant_message:
title = f"Codex: {assistant_message}"
else:
title = "Codex: Turn Complete!"
input_messages = notification.get("input_messages", [])
message = " ".join(input_messages)
title += message
case _:
print(f"not sending a push notification for: {notification_type}")
return 0
subprocess.check_output(
[
"terminal-notifier",
"-title",
title,
"-message",
message,
"-group",
"codex",
"-ignoreDnD",
"-activate",
"com.googlecode.iterm2",
]
)
return 0
if __name__ == "__main__":
sys.exit(main())
```
For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:
* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
2025-05-02 19:48:13 -07:00
|
|
|
notify: cfg.notify,
|
2025-07-22 09:42:22 -07:00
|
|
|
user_instructions,
|
|
|
|
|
base_instructions,
|
2025-10-27 19:41:49 -07:00
|
|
|
// The config.toml omits "_mode" because it's a config file. However, "_mode"
|
|
|
|
|
// is important in code to differentiate the mode from the store implementation.
|
|
|
|
|
cli_auth_credentials_store_mode: cfg.cli_auth_credentials_store.unwrap_or_default(),
|
feat: support mcp_servers in config.toml (#829)
This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.
(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)
`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.
This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)
To test, I added the following to my `~/.codex/config.toml`:
```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```
And then I ran the following:
```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):
This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph
Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```
Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
2025-05-06 15:47:59 -07:00
|
|
|
mcp_servers: cfg.mcp_servers,
|
2025-10-07 19:39:32 -07:00
|
|
|
// The config.toml omits "_mode" because it's a config file. However, "_mode"
|
|
|
|
|
// is important in code to differentiate the mode from the store implementation.
|
|
|
|
|
mcp_oauth_credentials_store_mode: cfg.mcp_oauth_credentials_store.unwrap_or_default(),
|
2025-05-07 17:38:28 -07:00
|
|
|
model_providers,
|
2025-05-10 17:52:59 -07:00
|
|
|
project_doc_max_bytes: cfg.project_doc_max_bytes.unwrap_or(PROJECT_DOC_MAX_BYTES),
|
2025-10-01 11:19:59 -07:00
|
|
|
project_doc_fallback_filenames: cfg
|
|
|
|
|
.project_doc_fallback_filenames
|
|
|
|
|
.unwrap_or_default()
|
|
|
|
|
.into_iter()
|
|
|
|
|
.filter_map(|name| {
|
|
|
|
|
let trimmed = name.trim();
|
|
|
|
|
if trimmed.is_empty() {
|
|
|
|
|
None
|
|
|
|
|
} else {
|
|
|
|
|
Some(trimmed.to_string())
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.collect(),
|
2025-05-15 00:30:13 -07:00
|
|
|
codex_home,
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
history,
|
2025-05-16 11:33:08 -07:00
|
|
|
file_opener: cfg.file_opener.unwrap_or(UriBasedFileOpener::VsCode),
|
2025-05-22 21:52:28 -07:00
|
|
|
codex_linux_sandbox_exe,
|
2025-05-30 23:14:56 -07:00
|
|
|
|
2025-08-04 17:03:24 -07:00
|
|
|
hide_agent_reasoning: cfg.hide_agent_reasoning.unwrap_or(false),
|
2025-08-05 11:31:11 -07:00
|
|
|
show_raw_agent_reasoning: cfg
|
|
|
|
|
.show_raw_agent_reasoning
|
2025-08-05 14:42:49 -07:00
|
|
|
.or(show_raw_agent_reasoning)
|
2025-08-05 11:31:11 -07:00
|
|
|
.unwrap_or(false),
|
2025-07-08 22:05:22 +03:00
|
|
|
model_reasoning_effort: config_profile
|
|
|
|
|
.model_reasoning_effort
|
2025-09-12 12:06:33 -07:00
|
|
|
.or(cfg.model_reasoning_effort),
|
2025-07-08 22:05:22 +03:00
|
|
|
model_reasoning_summary: config_profile
|
|
|
|
|
.model_reasoning_summary
|
|
|
|
|
.or(cfg.model_reasoning_summary)
|
|
|
|
|
.unwrap_or_default(),
|
2025-08-22 17:12:10 +01:00
|
|
|
model_verbosity: config_profile.model_verbosity.or(cfg.model_verbosity),
|
2025-07-11 13:30:11 -04:00
|
|
|
chatgpt_base_url: config_profile
|
|
|
|
|
.chatgpt_base_url
|
2025-08-28 19:24:38 -07:00
|
|
|
.or(cfg.chatgpt_base_url)
|
2025-07-11 13:30:11 -04:00
|
|
|
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),
|
2025-10-20 08:50:54 -07:00
|
|
|
forced_chatgpt_workspace_id,
|
|
|
|
|
forced_login_method,
|
2025-10-14 18:50:00 +01:00
|
|
|
include_apply_patch_tool: include_apply_patch_tool_flag,
|
2025-08-23 22:58:56 -07:00
|
|
|
tools_web_search_request,
|
2025-10-24 17:23:44 -05:00
|
|
|
experimental_sandbox_command_assessment,
|
2025-10-14 18:50:00 +01:00
|
|
|
use_experimental_streamable_shell_tool,
|
|
|
|
|
use_experimental_unified_exec_tool,
|
|
|
|
|
use_experimental_use_rmcp_client,
|
|
|
|
|
include_view_image_tool: include_view_image_tool_flag,
|
|
|
|
|
features,
|
2025-09-10 13:53:46 -07:00
|
|
|
active_profile: active_profile_name,
|
2025-10-16 11:23:38 -07:00
|
|
|
active_project,
|
2025-10-04 17:41:40 -07:00
|
|
|
windows_wsl_setup_acknowledged: cfg.windows_wsl_setup_acknowledged.unwrap_or(false),
|
2025-10-16 17:31:46 -07:00
|
|
|
notices: cfg.notice.unwrap_or_default(),
|
2025-08-28 12:54:12 -07:00
|
|
|
disable_paste_burst: cfg.disable_paste_burst.unwrap_or(false),
|
2025-09-15 10:22:02 -07:00
|
|
|
tui_notifications: cfg
|
|
|
|
|
.tui
|
|
|
|
|
.as_ref()
|
|
|
|
|
.map(|t| t.notifications.clone())
|
|
|
|
|
.unwrap_or_default(),
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
otel: {
|
|
|
|
|
let t: OtelConfigToml = cfg.otel.unwrap_or_default();
|
|
|
|
|
let log_user_prompt = t.log_user_prompt.unwrap_or(false);
|
|
|
|
|
let environment = t
|
|
|
|
|
.environment
|
|
|
|
|
.unwrap_or(DEFAULT_OTEL_ENVIRONMENT.to_string());
|
|
|
|
|
let exporter = t.exporter.unwrap_or(OtelExporterKind::None);
|
|
|
|
|
OtelConfig {
|
|
|
|
|
log_user_prompt,
|
|
|
|
|
environment,
|
|
|
|
|
exporter,
|
|
|
|
|
}
|
|
|
|
|
},
|
2025-05-07 17:38:28 -07:00
|
|
|
};
|
|
|
|
|
Ok(config)
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
}
|
|
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
fn load_instructions(codex_dir: Option<&Path>) -> Option<String> {
|
2025-10-15 17:46:01 +01:00
|
|
|
let base = codex_dir?;
|
|
|
|
|
for candidate in [LOCAL_PROJECT_DOC_FILENAME, DEFAULT_PROJECT_DOC_FILENAME] {
|
|
|
|
|
let mut path = base.to_path_buf();
|
|
|
|
|
path.push(candidate);
|
|
|
|
|
if let Ok(contents) = std::fs::read_to_string(&path) {
|
|
|
|
|
let trimmed = contents.trim();
|
|
|
|
|
if !trimmed.is_empty() {
|
|
|
|
|
return Some(trimmed.to_string());
|
|
|
|
|
}
|
2025-05-12 17:24:44 -07:00
|
|
|
}
|
2025-10-15 17:46:01 +01:00
|
|
|
}
|
|
|
|
|
None
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
}
|
2025-07-22 09:42:22 -07:00
|
|
|
|
2025-07-29 10:06:05 -07:00
|
|
|
fn get_base_instructions(
|
|
|
|
|
path: Option<&PathBuf>,
|
|
|
|
|
cwd: &Path,
|
|
|
|
|
) -> std::io::Result<Option<String>> {
|
|
|
|
|
let p = match path.as_ref() {
|
|
|
|
|
None => return Ok(None),
|
|
|
|
|
Some(p) => p,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Resolve relative paths against the provided cwd to make CLI
|
|
|
|
|
// overrides consistent regardless of where the process was launched
|
|
|
|
|
// from.
|
|
|
|
|
let full_path = if p.is_relative() {
|
|
|
|
|
cwd.join(p)
|
|
|
|
|
} else {
|
|
|
|
|
p.to_path_buf()
|
|
|
|
|
};
|
2025-07-22 09:42:22 -07:00
|
|
|
|
2025-07-29 10:06:05 -07:00
|
|
|
let contents = std::fs::read_to_string(&full_path).map_err(|e| {
|
|
|
|
|
std::io::Error::new(
|
|
|
|
|
e.kind(),
|
|
|
|
|
format!(
|
|
|
|
|
"failed to read experimental instructions file {}: {e}",
|
|
|
|
|
full_path.display()
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
let s = contents.trim().to_string();
|
|
|
|
|
if s.is_empty() {
|
|
|
|
|
Err(std::io::Error::new(
|
|
|
|
|
std::io::ErrorKind::InvalidData,
|
|
|
|
|
format!(
|
|
|
|
|
"experimental instructions file is empty: {}",
|
|
|
|
|
full_path.display()
|
|
|
|
|
),
|
|
|
|
|
))
|
|
|
|
|
} else {
|
|
|
|
|
Ok(Some(s))
|
|
|
|
|
}
|
2025-07-22 09:42:22 -07:00
|
|
|
}
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
}
|
fix: write logs to ~/.codex/log instead of /tmp (#669)
Previously, the Rust TUI was writing log files to `/tmp`, which is
world-readable and not available on Windows, so that isn't great.
This PR tries to clean things up by adding a function that provides the
path to the "Codex config dir," e.g., `~/.codex` (though I suppose we
could support `$CODEX_HOME` to override this?) and then defines other
paths in terms of the result of `codex_dir()`.
For example, `log_dir()` returns the folder where log files should be
written which is defined in terms of `codex_dir()`. I updated the TUI to
use this function. On UNIX, we even go so far as to `chmod 600` the log
file by default, though as noted in a comment, it's a bit tedious to do
the equivalent on Windows, so we just let that go for now.
This also changes the default logging level to `info` for `codex_core`
and `codex_tui` when `RUST_LOG` is not specified. I'm not really sure if
we should use a more verbose default (it may be helpful when debugging
user issues), though if so, we should probably also set up log rotation?
2025-04-25 17:37:41 -07:00
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
fn default_model() -> String {
|
|
|
|
|
OPENAI_DEFAULT_MODEL.to_string()
|
|
|
|
|
}
|
|
|
|
|
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
fn default_review_model() -> String {
|
|
|
|
|
OPENAI_DEFAULT_REVIEW_MODEL.to_string()
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-15 00:30:13 -07:00
|
|
|
/// Returns the path to the Codex configuration directory, which can be
|
|
|
|
|
/// specified by the `CODEX_HOME` environment variable. If not set, defaults to
|
|
|
|
|
/// `~/.codex`.
|
|
|
|
|
///
|
|
|
|
|
/// - If `CODEX_HOME` is set, the value will be canonicalized and this
|
|
|
|
|
/// function will Err if the path does not exist.
|
|
|
|
|
/// - If `CODEX_HOME` is not set, this function does not verify that the
|
|
|
|
|
/// directory exists.
|
2025-07-22 15:54:33 -07:00
|
|
|
pub fn find_codex_home() -> std::io::Result<PathBuf> {
|
2025-05-15 00:30:13 -07:00
|
|
|
// Honor the `CODEX_HOME` environment variable when it is set to allow users
|
|
|
|
|
// (and tests) to override the default location.
|
2025-08-19 13:22:02 -07:00
|
|
|
if let Ok(val) = std::env::var("CODEX_HOME")
|
|
|
|
|
&& !val.is_empty()
|
|
|
|
|
{
|
|
|
|
|
return PathBuf::from(val).canonicalize();
|
2025-05-15 00:30:13 -07:00
|
|
|
}
|
|
|
|
|
|
fix: write logs to ~/.codex/log instead of /tmp (#669)
Previously, the Rust TUI was writing log files to `/tmp`, which is
world-readable and not available on Windows, so that isn't great.
This PR tries to clean things up by adding a function that provides the
path to the "Codex config dir," e.g., `~/.codex` (though I suppose we
could support `$CODEX_HOME` to override this?) and then defines other
paths in terms of the result of `codex_dir()`.
For example, `log_dir()` returns the folder where log files should be
written which is defined in terms of `codex_dir()`. I updated the TUI to
use this function. On UNIX, we even go so far as to `chmod 600` the log
file by default, though as noted in a comment, it's a bit tedious to do
the equivalent on Windows, so we just let that go for now.
This also changes the default logging level to `info` for `codex_core`
and `codex_tui` when `RUST_LOG` is not specified. I'm not really sure if
we should use a more verbose default (it may be helpful when debugging
user issues), though if so, we should probably also set up log rotation?
2025-04-25 17:37:41 -07:00
|
|
|
let mut p = home_dir().ok_or_else(|| {
|
|
|
|
|
std::io::Error::new(
|
|
|
|
|
std::io::ErrorKind::NotFound,
|
|
|
|
|
"Could not find home directory",
|
|
|
|
|
)
|
|
|
|
|
})?;
|
|
|
|
|
p.push(".codex");
|
|
|
|
|
Ok(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Returns the path to the folder where Codex logs are stored. Does not verify
|
|
|
|
|
/// that the directory exists.
|
2025-05-15 00:30:13 -07:00
|
|
|
pub fn log_dir(cfg: &Config) -> std::io::Result<PathBuf> {
|
|
|
|
|
let mut p = cfg.codex_home.clone();
|
fix: write logs to ~/.codex/log instead of /tmp (#669)
Previously, the Rust TUI was writing log files to `/tmp`, which is
world-readable and not available on Windows, so that isn't great.
This PR tries to clean things up by adding a function that provides the
path to the "Codex config dir," e.g., `~/.codex` (though I suppose we
could support `$CODEX_HOME` to override this?) and then defines other
paths in terms of the result of `codex_dir()`.
For example, `log_dir()` returns the folder where log files should be
written which is defined in terms of `codex_dir()`. I updated the TUI to
use this function. On UNIX, we even go so far as to `chmod 600` the log
file by default, though as noted in a comment, it's a bit tedious to do
the equivalent on Windows, so we just let that go for now.
This also changes the default logging level to `info` for `codex_core`
and `codex_tui` when `RUST_LOG` is not specified. I'm not really sure if
we should use a more verbose default (it may be helpful when debugging
user issues), though if so, we should probably also set up log rotation?
2025-04-25 17:37:41 -07:00
|
|
|
p.push("log");
|
|
|
|
|
Ok(p)
|
|
|
|
|
}
|
2025-04-29 18:42:52 -07:00
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
2025-10-30 10:28:32 +00:00
|
|
|
use crate::config::edit::ConfigEdit;
|
|
|
|
|
use crate::config::edit::ConfigEditsBuilder;
|
|
|
|
|
use crate::config::edit::apply_blocking;
|
|
|
|
|
use crate::config::types::HistoryPersistence;
|
|
|
|
|
use crate::config::types::McpServerTransportConfig;
|
|
|
|
|
use crate::config::types::Notifications;
|
2025-10-14 18:50:00 +01:00
|
|
|
use crate::features::Feature;
|
2025-05-20 11:55:25 -07:00
|
|
|
|
2025-04-29 18:42:52 -07:00
|
|
|
use super::*;
|
2025-05-13 16:52:52 -07:00
|
|
|
use pretty_assertions::assert_eq;
|
2025-09-11 15:04:29 -07:00
|
|
|
|
2025-09-22 10:30:59 -07:00
|
|
|
use std::time::Duration;
|
2025-05-13 16:52:52 -07:00
|
|
|
use tempfile::TempDir;
|
2025-04-29 18:42:52 -07:00
|
|
|
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
#[test]
|
|
|
|
|
fn test_toml_parsing() {
|
|
|
|
|
let history_with_persistence = r#"
|
|
|
|
|
[history]
|
|
|
|
|
persistence = "save-all"
|
|
|
|
|
"#;
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
let history_with_persistence_cfg = toml::from_str::<ConfigToml>(history_with_persistence)
|
|
|
|
|
.expect("TOML deserialization should succeed");
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
assert_eq!(
|
|
|
|
|
Some(History {
|
|
|
|
|
persistence: HistoryPersistence::SaveAll,
|
|
|
|
|
max_bytes: None,
|
|
|
|
|
}),
|
|
|
|
|
history_with_persistence_cfg.history
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let history_no_persistence = r#"
|
|
|
|
|
[history]
|
|
|
|
|
persistence = "none"
|
|
|
|
|
"#;
|
|
|
|
|
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
let history_no_persistence_cfg = toml::from_str::<ConfigToml>(history_no_persistence)
|
|
|
|
|
.expect("TOML deserialization should succeed");
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
assert_eq!(
|
|
|
|
|
Some(History {
|
|
|
|
|
persistence: HistoryPersistence::None,
|
|
|
|
|
max_bytes: None,
|
|
|
|
|
}),
|
|
|
|
|
history_no_persistence_cfg.history
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-18 11:25:09 -07:00
|
|
|
#[test]
|
|
|
|
|
fn tui_config_missing_notifications_field_defaults_to_disabled() {
|
|
|
|
|
let cfg = r#"
|
|
|
|
|
[tui]
|
|
|
|
|
"#;
|
|
|
|
|
|
|
|
|
|
let parsed = toml::from_str::<ConfigToml>(cfg)
|
|
|
|
|
.expect("TUI config without notifications should succeed");
|
|
|
|
|
let tui = parsed.tui.expect("config should include tui section");
|
|
|
|
|
|
|
|
|
|
assert_eq!(tui.notifications, Notifications::Enabled(false));
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-29 18:42:52 -07:00
|
|
|
#[test]
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
fn test_sandbox_config_parsing() {
|
|
|
|
|
let sandbox_full_access = r#"
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
sandbox_mode = "danger-full-access"
|
|
|
|
|
|
|
|
|
|
[sandbox_workspace_write]
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
network_access = false # This should be ignored.
|
|
|
|
|
"#;
|
|
|
|
|
let sandbox_full_access_cfg = toml::from_str::<ConfigToml>(sandbox_full_access)
|
|
|
|
|
.expect("TOML deserialization should succeed");
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
let sandbox_mode_override = None;
|
2025-10-27 18:19:32 -07:00
|
|
|
let resolution = sandbox_full_access_cfg.derive_sandbox_policy(
|
|
|
|
|
sandbox_mode_override,
|
|
|
|
|
None,
|
|
|
|
|
&PathBuf::from("/tmp/test"),
|
|
|
|
|
);
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
assert_eq!(
|
2025-10-27 18:19:32 -07:00
|
|
|
resolution,
|
|
|
|
|
SandboxPolicyResolution {
|
|
|
|
|
policy: SandboxPolicy::DangerFullAccess,
|
|
|
|
|
forced_auto_mode_downgraded_on_windows: false,
|
|
|
|
|
}
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
);
|
2025-04-29 18:42:52 -07:00
|
|
|
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
let sandbox_read_only = r#"
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
sandbox_mode = "read-only"
|
|
|
|
|
|
|
|
|
|
[sandbox_workspace_write]
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
network_access = true # This should be ignored.
|
|
|
|
|
"#;
|
|
|
|
|
|
|
|
|
|
let sandbox_read_only_cfg = toml::from_str::<ConfigToml>(sandbox_read_only)
|
|
|
|
|
.expect("TOML deserialization should succeed");
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
let sandbox_mode_override = None;
|
2025-10-27 18:19:32 -07:00
|
|
|
let resolution = sandbox_read_only_cfg.derive_sandbox_policy(
|
|
|
|
|
sandbox_mode_override,
|
|
|
|
|
None,
|
|
|
|
|
&PathBuf::from("/tmp/test"),
|
|
|
|
|
);
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
assert_eq!(
|
2025-10-27 18:19:32 -07:00
|
|
|
resolution,
|
|
|
|
|
SandboxPolicyResolution {
|
|
|
|
|
policy: SandboxPolicy::ReadOnly,
|
|
|
|
|
forced_auto_mode_downgraded_on_windows: false,
|
|
|
|
|
}
|
2025-10-16 11:23:38 -07:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let sandbox_workspace_write = r#"
|
|
|
|
|
sandbox_mode = "workspace-write"
|
|
|
|
|
|
|
|
|
|
[sandbox_workspace_write]
|
|
|
|
|
writable_roots = [
|
|
|
|
|
"/my/workspace",
|
|
|
|
|
]
|
|
|
|
|
exclude_tmpdir_env_var = true
|
|
|
|
|
exclude_slash_tmp = true
|
|
|
|
|
"#;
|
|
|
|
|
|
|
|
|
|
let sandbox_workspace_write_cfg = toml::from_str::<ConfigToml>(sandbox_workspace_write)
|
|
|
|
|
.expect("TOML deserialization should succeed");
|
|
|
|
|
let sandbox_mode_override = None;
|
2025-10-27 18:19:32 -07:00
|
|
|
let resolution = sandbox_workspace_write_cfg.derive_sandbox_policy(
|
|
|
|
|
sandbox_mode_override,
|
|
|
|
|
None,
|
|
|
|
|
&PathBuf::from("/tmp/test"),
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
);
|
2025-10-27 18:19:32 -07:00
|
|
|
if cfg!(target_os = "windows") {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
resolution,
|
|
|
|
|
SandboxPolicyResolution {
|
|
|
|
|
policy: SandboxPolicy::ReadOnly,
|
|
|
|
|
forced_auto_mode_downgraded_on_windows: true,
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
} else {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
resolution,
|
|
|
|
|
SandboxPolicyResolution {
|
|
|
|
|
policy: SandboxPolicy::WorkspaceWrite {
|
|
|
|
|
writable_roots: vec![PathBuf::from("/my/workspace")],
|
|
|
|
|
network_access: false,
|
|
|
|
|
exclude_tmpdir_env_var: true,
|
|
|
|
|
exclude_slash_tmp: true,
|
|
|
|
|
},
|
|
|
|
|
forced_auto_mode_downgraded_on_windows: false,
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
}
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
|
|
|
|
|
let sandbox_workspace_write = r#"
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
sandbox_mode = "workspace-write"
|
|
|
|
|
|
|
|
|
|
[sandbox_workspace_write]
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
writable_roots = [
|
2025-08-07 00:17:00 -07:00
|
|
|
"/my/workspace",
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
]
|
2025-08-07 00:17:00 -07:00
|
|
|
exclude_tmpdir_env_var = true
|
|
|
|
|
exclude_slash_tmp = true
|
2025-10-16 11:23:38 -07:00
|
|
|
|
|
|
|
|
[projects."/tmp/test"]
|
|
|
|
|
trust_level = "trusted"
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
"#;
|
2025-04-29 18:42:52 -07:00
|
|
|
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
let sandbox_workspace_write_cfg = toml::from_str::<ConfigToml>(sandbox_workspace_write)
|
|
|
|
|
.expect("TOML deserialization should succeed");
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
let sandbox_mode_override = None;
|
2025-10-27 18:19:32 -07:00
|
|
|
let resolution = sandbox_workspace_write_cfg.derive_sandbox_policy(
|
|
|
|
|
sandbox_mode_override,
|
|
|
|
|
None,
|
|
|
|
|
&PathBuf::from("/tmp/test"),
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
);
|
2025-10-27 18:19:32 -07:00
|
|
|
if cfg!(target_os = "windows") {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
resolution,
|
|
|
|
|
SandboxPolicyResolution {
|
|
|
|
|
policy: SandboxPolicy::ReadOnly,
|
|
|
|
|
forced_auto_mode_downgraded_on_windows: true,
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
} else {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
resolution,
|
|
|
|
|
SandboxPolicyResolution {
|
|
|
|
|
policy: SandboxPolicy::WorkspaceWrite {
|
|
|
|
|
writable_roots: vec![PathBuf::from("/my/workspace")],
|
|
|
|
|
network_access: false,
|
|
|
|
|
exclude_tmpdir_env_var: true,
|
|
|
|
|
exclude_slash_tmp: true,
|
|
|
|
|
},
|
|
|
|
|
forced_auto_mode_downgraded_on_windows: false,
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
}
|
2025-04-29 18:42:52 -07:00
|
|
|
}
|
2025-09-14 21:30:56 -07:00
|
|
|
|
2025-10-18 22:13:53 -07:00
|
|
|
#[test]
|
|
|
|
|
fn add_dir_override_extends_workspace_writable_roots() -> std::io::Result<()> {
|
|
|
|
|
let temp_dir = TempDir::new()?;
|
|
|
|
|
let frontend = temp_dir.path().join("frontend");
|
|
|
|
|
let backend = temp_dir.path().join("backend");
|
|
|
|
|
std::fs::create_dir_all(&frontend)?;
|
|
|
|
|
std::fs::create_dir_all(&backend)?;
|
|
|
|
|
|
|
|
|
|
let overrides = ConfigOverrides {
|
|
|
|
|
cwd: Some(frontend),
|
|
|
|
|
sandbox_mode: Some(SandboxMode::WorkspaceWrite),
|
|
|
|
|
additional_writable_roots: vec![PathBuf::from("../backend"), backend.clone()],
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
ConfigToml::default(),
|
|
|
|
|
overrides,
|
|
|
|
|
temp_dir.path().to_path_buf(),
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
let expected_backend = canonicalize(&backend).expect("canonicalize backend directory");
|
2025-10-27 18:19:32 -07:00
|
|
|
if cfg!(target_os = "windows") {
|
|
|
|
|
assert!(
|
|
|
|
|
config.forced_auto_mode_downgraded_on_windows,
|
|
|
|
|
"expected workspace-write request to be downgraded on Windows"
|
|
|
|
|
);
|
|
|
|
|
match config.sandbox_policy {
|
|
|
|
|
SandboxPolicy::ReadOnly => {}
|
|
|
|
|
other => panic!("expected read-only policy on Windows, got {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
match config.sandbox_policy {
|
|
|
|
|
SandboxPolicy::WorkspaceWrite { writable_roots, .. } => {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
writable_roots
|
|
|
|
|
.iter()
|
|
|
|
|
.filter(|root| **root == expected_backend)
|
|
|
|
|
.count(),
|
|
|
|
|
1,
|
|
|
|
|
"expected single writable root entry for {}",
|
|
|
|
|
expected_backend.display()
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
other => panic!("expected workspace-write policy, got {other:?}"),
|
2025-10-18 22:13:53 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-27 19:41:49 -07:00
|
|
|
#[test]
|
|
|
|
|
fn config_defaults_to_file_cli_auth_store_mode() -> std::io::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let cfg = ConfigToml::default();
|
|
|
|
|
|
|
|
|
|
let config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
cfg,
|
|
|
|
|
ConfigOverrides::default(),
|
|
|
|
|
codex_home.path().to_path_buf(),
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
config.cli_auth_credentials_store_mode,
|
|
|
|
|
AuthCredentialsStoreMode::File,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn config_honors_explicit_keyring_auth_store_mode() -> std::io::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let cfg = ConfigToml {
|
|
|
|
|
cli_auth_credentials_store: Some(AuthCredentialsStoreMode::Keyring),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
cfg,
|
|
|
|
|
ConfigOverrides::default(),
|
|
|
|
|
codex_home.path().to_path_buf(),
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
config.cli_auth_credentials_store_mode,
|
|
|
|
|
AuthCredentialsStoreMode::Keyring,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-07 19:39:32 -07:00
|
|
|
#[test]
|
|
|
|
|
fn config_defaults_to_auto_oauth_store_mode() -> std::io::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let cfg = ConfigToml::default();
|
|
|
|
|
|
|
|
|
|
let config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
cfg,
|
|
|
|
|
ConfigOverrides::default(),
|
|
|
|
|
codex_home.path().to_path_buf(),
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
config.mcp_oauth_credentials_store_mode,
|
|
|
|
|
OAuthCredentialsStoreMode::Auto,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-14 18:50:00 +01:00
|
|
|
#[test]
|
|
|
|
|
fn profile_legacy_toggles_override_base() -> std::io::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let mut profiles = HashMap::new();
|
|
|
|
|
profiles.insert(
|
|
|
|
|
"work".to_string(),
|
|
|
|
|
ConfigProfile {
|
|
|
|
|
include_view_image_tool: Some(false),
|
|
|
|
|
..Default::default()
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
let cfg = ConfigToml {
|
|
|
|
|
profiles,
|
|
|
|
|
profile: Some("work".to_string()),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
cfg,
|
|
|
|
|
ConfigOverrides::default(),
|
|
|
|
|
codex_home.path().to_path_buf(),
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
assert!(!config.features.enabled(Feature::ViewImageTool));
|
|
|
|
|
assert!(!config.include_view_image_tool);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-25 18:52:26 -05:00
|
|
|
#[test]
|
|
|
|
|
fn profile_sandbox_mode_overrides_base() -> std::io::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let mut profiles = HashMap::new();
|
|
|
|
|
profiles.insert(
|
|
|
|
|
"work".to_string(),
|
|
|
|
|
ConfigProfile {
|
|
|
|
|
sandbox_mode: Some(SandboxMode::DangerFullAccess),
|
|
|
|
|
..Default::default()
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
let cfg = ConfigToml {
|
|
|
|
|
profiles,
|
|
|
|
|
profile: Some("work".to_string()),
|
|
|
|
|
sandbox_mode: Some(SandboxMode::ReadOnly),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
cfg,
|
|
|
|
|
ConfigOverrides::default(),
|
|
|
|
|
codex_home.path().to_path_buf(),
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
assert!(matches!(
|
|
|
|
|
config.sandbox_policy,
|
|
|
|
|
SandboxPolicy::DangerFullAccess
|
|
|
|
|
));
|
|
|
|
|
assert!(config.did_user_set_custom_approval_policy_or_sandbox_mode);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn cli_override_takes_precedence_over_profile_sandbox_mode() -> std::io::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let mut profiles = HashMap::new();
|
|
|
|
|
profiles.insert(
|
|
|
|
|
"work".to_string(),
|
|
|
|
|
ConfigProfile {
|
|
|
|
|
sandbox_mode: Some(SandboxMode::DangerFullAccess),
|
|
|
|
|
..Default::default()
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
let cfg = ConfigToml {
|
|
|
|
|
profiles,
|
|
|
|
|
profile: Some("work".to_string()),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let overrides = ConfigOverrides {
|
|
|
|
|
sandbox_mode: Some(SandboxMode::WorkspaceWrite),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
cfg,
|
|
|
|
|
overrides,
|
|
|
|
|
codex_home.path().to_path_buf(),
|
|
|
|
|
)?;
|
|
|
|
|
|
2025-10-27 18:19:32 -07:00
|
|
|
if cfg!(target_os = "windows") {
|
|
|
|
|
assert!(matches!(config.sandbox_policy, SandboxPolicy::ReadOnly));
|
|
|
|
|
assert!(config.forced_auto_mode_downgraded_on_windows);
|
|
|
|
|
} else {
|
|
|
|
|
assert!(matches!(
|
|
|
|
|
config.sandbox_policy,
|
|
|
|
|
SandboxPolicy::WorkspaceWrite { .. }
|
|
|
|
|
));
|
|
|
|
|
assert!(!config.forced_auto_mode_downgraded_on_windows);
|
|
|
|
|
}
|
2025-10-25 18:52:26 -05:00
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-14 18:50:00 +01:00
|
|
|
#[test]
|
|
|
|
|
fn feature_table_overrides_legacy_flags() -> std::io::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let mut entries = BTreeMap::new();
|
|
|
|
|
entries.insert("apply_patch_freeform".to_string(), false);
|
|
|
|
|
let cfg = ConfigToml {
|
|
|
|
|
features: Some(crate::features::FeaturesToml { entries }),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
cfg,
|
|
|
|
|
ConfigOverrides::default(),
|
|
|
|
|
codex_home.path().to_path_buf(),
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
assert!(!config.features.enabled(Feature::ApplyPatchFreeform));
|
|
|
|
|
assert!(!config.include_apply_patch_tool);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn legacy_toggles_map_to_features() -> std::io::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let cfg = ConfigToml {
|
|
|
|
|
experimental_use_exec_command_tool: Some(true),
|
|
|
|
|
experimental_use_unified_exec_tool: Some(true),
|
|
|
|
|
experimental_use_rmcp_client: Some(true),
|
|
|
|
|
experimental_use_freeform_apply_patch: Some(true),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
cfg,
|
|
|
|
|
ConfigOverrides::default(),
|
|
|
|
|
codex_home.path().to_path_buf(),
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
assert!(config.features.enabled(Feature::ApplyPatchFreeform));
|
|
|
|
|
assert!(config.features.enabled(Feature::StreamableShell));
|
|
|
|
|
assert!(config.features.enabled(Feature::UnifiedExec));
|
|
|
|
|
assert!(config.features.enabled(Feature::RmcpClient));
|
|
|
|
|
|
|
|
|
|
assert!(config.include_apply_patch_tool);
|
|
|
|
|
assert!(config.use_experimental_streamable_shell_tool);
|
|
|
|
|
assert!(config.use_experimental_unified_exec_tool);
|
|
|
|
|
assert!(config.use_experimental_use_rmcp_client);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-07 19:39:32 -07:00
|
|
|
#[test]
|
|
|
|
|
fn config_honors_explicit_file_oauth_store_mode() -> std::io::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let cfg = ConfigToml {
|
|
|
|
|
mcp_oauth_credentials_store: Some(OAuthCredentialsStoreMode::File),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
cfg,
|
|
|
|
|
ConfigOverrides::default(),
|
|
|
|
|
codex_home.path().to_path_buf(),
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
config.mcp_oauth_credentials_store_mode,
|
|
|
|
|
OAuthCredentialsStoreMode::File,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn managed_config_overrides_oauth_store_mode() -> anyhow::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let managed_path = codex_home.path().join("managed_config.toml");
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
|
|
|
|
|
std::fs::write(&config_path, "mcp_oauth_credentials_store = \"file\"\n")?;
|
|
|
|
|
std::fs::write(&managed_path, "mcp_oauth_credentials_store = \"keyring\"\n")?;
|
|
|
|
|
|
|
|
|
|
let overrides = crate::config_loader::LoaderOverrides {
|
|
|
|
|
managed_config_path: Some(managed_path.clone()),
|
|
|
|
|
#[cfg(target_os = "macos")]
|
|
|
|
|
managed_preferences_base64: None,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let root_value = load_resolved_config(codex_home.path(), Vec::new(), overrides).await?;
|
|
|
|
|
let cfg: ConfigToml = root_value.try_into().map_err(|e| {
|
|
|
|
|
tracing::error!("Failed to deserialize overridden config: {e}");
|
|
|
|
|
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
|
|
|
|
|
})?;
|
|
|
|
|
assert_eq!(
|
|
|
|
|
cfg.mcp_oauth_credentials_store,
|
|
|
|
|
Some(OAuthCredentialsStoreMode::Keyring),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let final_config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
cfg,
|
|
|
|
|
ConfigOverrides::default(),
|
|
|
|
|
codex_home.path().to_path_buf(),
|
|
|
|
|
)?;
|
|
|
|
|
assert_eq!(
|
|
|
|
|
final_config.mcp_oauth_credentials_store_mode,
|
|
|
|
|
OAuthCredentialsStoreMode::Keyring,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
#[tokio::test]
|
|
|
|
|
async fn load_global_mcp_servers_returns_empty_if_missing() -> anyhow::Result<()> {
|
2025-09-14 21:30:56 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let servers = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-14 21:30:56 -07:00
|
|
|
assert!(servers.is_empty());
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn replace_mcp_servers_round_trips_entries() -> anyhow::Result<()> {
|
2025-09-14 21:30:56 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
let mut servers = BTreeMap::new();
|
|
|
|
|
servers.insert(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
2025-09-26 18:24:01 -07:00
|
|
|
transport: McpServerTransportConfig::Stdio {
|
|
|
|
|
command: "echo".to_string(),
|
|
|
|
|
args: vec!["hello".to_string()],
|
|
|
|
|
env: None,
|
2025-10-16 21:24:43 -07:00
|
|
|
env_vars: Vec::new(),
|
|
|
|
|
cwd: None,
|
2025-09-26 18:24:01 -07:00
|
|
|
},
|
2025-10-08 13:24:51 -07:00
|
|
|
enabled: true,
|
2025-09-22 10:30:59 -07:00
|
|
|
startup_timeout_sec: Some(Duration::from_secs(3)),
|
|
|
|
|
tool_timeout_sec: Some(Duration::from_secs(5)),
|
2025-10-20 15:35:36 -07:00
|
|
|
enabled_tools: None,
|
|
|
|
|
disabled_tools: None,
|
2025-09-14 21:30:56 -07:00
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(servers.clone())],
|
|
|
|
|
)?;
|
2025-09-14 21:30:56 -07:00
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-14 21:30:56 -07:00
|
|
|
assert_eq!(loaded.len(), 1);
|
|
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
2025-09-26 18:24:01 -07:00
|
|
|
match &docs.transport {
|
2025-10-16 21:24:43 -07:00
|
|
|
McpServerTransportConfig::Stdio {
|
|
|
|
|
command,
|
|
|
|
|
args,
|
|
|
|
|
env,
|
|
|
|
|
env_vars,
|
|
|
|
|
cwd,
|
|
|
|
|
} => {
|
2025-09-26 18:24:01 -07:00
|
|
|
assert_eq!(command, "echo");
|
|
|
|
|
assert_eq!(args, &vec!["hello".to_string()]);
|
|
|
|
|
assert!(env.is_none());
|
2025-10-16 21:24:43 -07:00
|
|
|
assert!(env_vars.is_empty());
|
|
|
|
|
assert!(cwd.is_none());
|
2025-09-26 18:24:01 -07:00
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
2025-09-22 10:30:59 -07:00
|
|
|
assert_eq!(docs.startup_timeout_sec, Some(Duration::from_secs(3)));
|
|
|
|
|
assert_eq!(docs.tool_timeout_sec, Some(Duration::from_secs(5)));
|
2025-10-08 13:24:51 -07:00
|
|
|
assert!(docs.enabled);
|
2025-09-14 21:30:56 -07:00
|
|
|
|
|
|
|
|
let empty = BTreeMap::new();
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(empty.clone())],
|
|
|
|
|
)?;
|
2025-10-03 13:02:26 -07:00
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-14 21:30:56 -07:00
|
|
|
assert!(loaded.is_empty());
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
2025-05-13 16:52:52 -07:00
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
#[tokio::test]
|
|
|
|
|
async fn managed_config_wins_over_cli_overrides() -> anyhow::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let managed_path = codex_home.path().join("managed_config.toml");
|
|
|
|
|
|
|
|
|
|
std::fs::write(
|
|
|
|
|
codex_home.path().join(CONFIG_TOML_FILE),
|
|
|
|
|
"model = \"base\"\n",
|
|
|
|
|
)?;
|
|
|
|
|
std::fs::write(&managed_path, "model = \"managed_config\"\n")?;
|
|
|
|
|
|
|
|
|
|
let overrides = crate::config_loader::LoaderOverrides {
|
|
|
|
|
managed_config_path: Some(managed_path),
|
|
|
|
|
#[cfg(target_os = "macos")]
|
|
|
|
|
managed_preferences_base64: None,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let root_value = load_resolved_config(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
vec![("model".to_string(), TomlValue::String("cli".to_string()))],
|
|
|
|
|
overrides,
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
let cfg: ConfigToml = root_value.try_into().map_err(|e| {
|
|
|
|
|
tracing::error!("Failed to deserialize overridden config: {e}");
|
|
|
|
|
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
assert_eq!(cfg.model.as_deref(), Some("managed_config"));
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn load_global_mcp_servers_accepts_legacy_ms_field() -> anyhow::Result<()> {
|
2025-09-22 10:30:59 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
|
|
|
|
|
std::fs::write(
|
|
|
|
|
&config_path,
|
|
|
|
|
r#"
|
|
|
|
|
[mcp_servers]
|
|
|
|
|
[mcp_servers.docs]
|
|
|
|
|
command = "echo"
|
|
|
|
|
startup_timeout_ms = 2500
|
|
|
|
|
"#,
|
|
|
|
|
)?;
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let servers = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-22 10:30:59 -07:00
|
|
|
let docs = servers.get("docs").expect("docs entry");
|
|
|
|
|
assert_eq!(docs.startup_timeout_sec, Some(Duration::from_millis(2500)));
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-07 20:21:37 -07:00
|
|
|
#[tokio::test]
|
|
|
|
|
async fn load_global_mcp_servers_rejects_inline_bearer_token() -> anyhow::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
|
|
|
|
|
std::fs::write(
|
|
|
|
|
&config_path,
|
|
|
|
|
r#"
|
|
|
|
|
[mcp_servers.docs]
|
|
|
|
|
url = "https://example.com/mcp"
|
|
|
|
|
bearer_token = "secret"
|
|
|
|
|
"#,
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
let err = load_global_mcp_servers(codex_home.path())
|
|
|
|
|
.await
|
|
|
|
|
.expect_err("bearer_token entries should be rejected");
|
|
|
|
|
|
|
|
|
|
assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
|
|
|
|
|
assert!(err.to_string().contains("bearer_token"));
|
|
|
|
|
assert!(err.to_string().contains("bearer_token_env_var"));
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn replace_mcp_servers_serializes_env_sorted() -> anyhow::Result<()> {
|
2025-09-26 18:24:01 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
let servers = BTreeMap::from([(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::Stdio {
|
|
|
|
|
command: "docs-server".to_string(),
|
|
|
|
|
args: vec!["--verbose".to_string()],
|
|
|
|
|
env: Some(HashMap::from([
|
|
|
|
|
("ZIG_VAR".to_string(), "3".to_string()),
|
|
|
|
|
("ALPHA_VAR".to_string(), "1".to_string()),
|
|
|
|
|
])),
|
2025-10-16 21:24:43 -07:00
|
|
|
env_vars: Vec::new(),
|
|
|
|
|
cwd: None,
|
2025-09-26 18:24:01 -07:00
|
|
|
},
|
2025-10-08 13:24:51 -07:00
|
|
|
enabled: true,
|
2025-09-26 18:24:01 -07:00
|
|
|
startup_timeout_sec: None,
|
|
|
|
|
tool_timeout_sec: None,
|
2025-10-20 15:35:36 -07:00
|
|
|
enabled_tools: None,
|
|
|
|
|
disabled_tools: None,
|
2025-09-26 18:24:01 -07:00
|
|
|
},
|
|
|
|
|
)]);
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(servers.clone())],
|
|
|
|
|
)?;
|
2025-09-26 18:24:01 -07:00
|
|
|
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert_eq!(
|
|
|
|
|
serialized,
|
|
|
|
|
r#"[mcp_servers.docs]
|
|
|
|
|
command = "docs-server"
|
|
|
|
|
args = ["--verbose"]
|
|
|
|
|
|
|
|
|
|
[mcp_servers.docs.env]
|
|
|
|
|
ALPHA_VAR = "1"
|
|
|
|
|
ZIG_VAR = "3"
|
|
|
|
|
"#
|
|
|
|
|
);
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-26 18:24:01 -07:00
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
match &docs.transport {
|
2025-10-16 21:24:43 -07:00
|
|
|
McpServerTransportConfig::Stdio {
|
|
|
|
|
command,
|
|
|
|
|
args,
|
|
|
|
|
env,
|
|
|
|
|
env_vars,
|
|
|
|
|
cwd,
|
|
|
|
|
} => {
|
2025-09-26 18:24:01 -07:00
|
|
|
assert_eq!(command, "docs-server");
|
|
|
|
|
assert_eq!(args, &vec!["--verbose".to_string()]);
|
|
|
|
|
let env = env
|
|
|
|
|
.as_ref()
|
|
|
|
|
.expect("env should be preserved for stdio transport");
|
|
|
|
|
assert_eq!(env.get("ALPHA_VAR"), Some(&"1".to_string()));
|
|
|
|
|
assert_eq!(env.get("ZIG_VAR"), Some(&"3".to_string()));
|
2025-10-16 21:24:43 -07:00
|
|
|
assert!(env_vars.is_empty());
|
|
|
|
|
assert!(cwd.is_none());
|
|
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn replace_mcp_servers_serializes_env_vars() -> anyhow::Result<()> {
|
2025-10-16 21:24:43 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
let servers = BTreeMap::from([(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::Stdio {
|
|
|
|
|
command: "docs-server".to_string(),
|
|
|
|
|
args: Vec::new(),
|
|
|
|
|
env: None,
|
|
|
|
|
env_vars: vec!["ALPHA".to_string(), "BETA".to_string()],
|
|
|
|
|
cwd: None,
|
|
|
|
|
},
|
|
|
|
|
enabled: true,
|
|
|
|
|
startup_timeout_sec: None,
|
|
|
|
|
tool_timeout_sec: None,
|
2025-10-20 15:35:36 -07:00
|
|
|
enabled_tools: None,
|
|
|
|
|
disabled_tools: None,
|
2025-10-16 21:24:43 -07:00
|
|
|
},
|
|
|
|
|
)]);
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(servers.clone())],
|
|
|
|
|
)?;
|
2025-10-16 21:24:43 -07:00
|
|
|
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert!(
|
|
|
|
|
serialized.contains(r#"env_vars = ["ALPHA", "BETA"]"#),
|
|
|
|
|
"serialized config missing env_vars field:\n{serialized}"
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
|
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
match &docs.transport {
|
|
|
|
|
McpServerTransportConfig::Stdio { env_vars, .. } => {
|
|
|
|
|
assert_eq!(env_vars, &vec!["ALPHA".to_string(), "BETA".to_string()]);
|
|
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn replace_mcp_servers_serializes_cwd() -> anyhow::Result<()> {
|
2025-10-16 21:24:43 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
let cwd_path = PathBuf::from("/tmp/codex-mcp");
|
|
|
|
|
let servers = BTreeMap::from([(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::Stdio {
|
|
|
|
|
command: "docs-server".to_string(),
|
|
|
|
|
args: Vec::new(),
|
|
|
|
|
env: None,
|
|
|
|
|
env_vars: Vec::new(),
|
|
|
|
|
cwd: Some(cwd_path.clone()),
|
|
|
|
|
},
|
|
|
|
|
enabled: true,
|
|
|
|
|
startup_timeout_sec: None,
|
|
|
|
|
tool_timeout_sec: None,
|
2025-10-20 15:35:36 -07:00
|
|
|
enabled_tools: None,
|
|
|
|
|
disabled_tools: None,
|
2025-10-16 21:24:43 -07:00
|
|
|
},
|
|
|
|
|
)]);
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(servers.clone())],
|
|
|
|
|
)?;
|
2025-10-16 21:24:43 -07:00
|
|
|
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert!(
|
|
|
|
|
serialized.contains(r#"cwd = "/tmp/codex-mcp""#),
|
|
|
|
|
"serialized config missing cwd field:\n{serialized}"
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
|
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
match &docs.transport {
|
|
|
|
|
McpServerTransportConfig::Stdio { cwd, .. } => {
|
|
|
|
|
assert_eq!(cwd.as_deref(), Some(Path::new("/tmp/codex-mcp")));
|
2025-09-26 18:24:01 -07:00
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn replace_mcp_servers_streamable_http_serializes_bearer_token() -> anyhow::Result<()> {
|
2025-09-26 18:24:01 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
2025-10-16 20:15:47 -07:00
|
|
|
let servers = BTreeMap::from([(
|
2025-09-26 18:24:01 -07:00
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::StreamableHttp {
|
|
|
|
|
url: "https://example.com/mcp".to_string(),
|
2025-10-07 20:21:37 -07:00
|
|
|
bearer_token_env_var: Some("MCP_TOKEN".to_string()),
|
2025-10-16 20:15:47 -07:00
|
|
|
http_headers: None,
|
|
|
|
|
env_http_headers: None,
|
2025-09-26 18:24:01 -07:00
|
|
|
},
|
2025-10-08 13:24:51 -07:00
|
|
|
enabled: true,
|
2025-09-26 18:24:01 -07:00
|
|
|
startup_timeout_sec: Some(Duration::from_secs(2)),
|
|
|
|
|
tool_timeout_sec: None,
|
2025-10-20 15:35:36 -07:00
|
|
|
enabled_tools: None,
|
|
|
|
|
disabled_tools: None,
|
2025-09-26 18:24:01 -07:00
|
|
|
},
|
|
|
|
|
)]);
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(servers.clone())],
|
|
|
|
|
)?;
|
2025-09-26 18:24:01 -07:00
|
|
|
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert_eq!(
|
|
|
|
|
serialized,
|
|
|
|
|
r#"[mcp_servers.docs]
|
|
|
|
|
url = "https://example.com/mcp"
|
2025-10-07 20:21:37 -07:00
|
|
|
bearer_token_env_var = "MCP_TOKEN"
|
2025-09-26 18:24:01 -07:00
|
|
|
startup_timeout_sec = 2.0
|
|
|
|
|
"#
|
|
|
|
|
);
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-26 18:24:01 -07:00
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
match &docs.transport {
|
2025-10-07 20:21:37 -07:00
|
|
|
McpServerTransportConfig::StreamableHttp {
|
|
|
|
|
url,
|
|
|
|
|
bearer_token_env_var,
|
2025-10-16 20:15:47 -07:00
|
|
|
http_headers,
|
|
|
|
|
env_http_headers,
|
2025-10-07 20:21:37 -07:00
|
|
|
} => {
|
2025-09-26 18:24:01 -07:00
|
|
|
assert_eq!(url, "https://example.com/mcp");
|
2025-10-07 20:21:37 -07:00
|
|
|
assert_eq!(bearer_token_env_var.as_deref(), Some("MCP_TOKEN"));
|
2025-10-16 20:15:47 -07:00
|
|
|
assert!(http_headers.is_none());
|
|
|
|
|
assert!(env_http_headers.is_none());
|
2025-09-26 18:24:01 -07:00
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
assert_eq!(docs.startup_timeout_sec, Some(Duration::from_secs(2)));
|
|
|
|
|
|
2025-10-16 20:15:47 -07:00
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn replace_mcp_servers_streamable_http_serializes_custom_headers() -> anyhow::Result<()> {
|
2025-10-16 20:15:47 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
let servers = BTreeMap::from([(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::StreamableHttp {
|
|
|
|
|
url: "https://example.com/mcp".to_string(),
|
|
|
|
|
bearer_token_env_var: Some("MCP_TOKEN".to_string()),
|
|
|
|
|
http_headers: Some(HashMap::from([("X-Doc".to_string(), "42".to_string())])),
|
|
|
|
|
env_http_headers: Some(HashMap::from([(
|
|
|
|
|
"X-Auth".to_string(),
|
|
|
|
|
"DOCS_AUTH".to_string(),
|
|
|
|
|
)])),
|
|
|
|
|
},
|
|
|
|
|
enabled: true,
|
|
|
|
|
startup_timeout_sec: Some(Duration::from_secs(2)),
|
|
|
|
|
tool_timeout_sec: None,
|
2025-10-20 15:35:36 -07:00
|
|
|
enabled_tools: None,
|
|
|
|
|
disabled_tools: None,
|
2025-10-16 20:15:47 -07:00
|
|
|
},
|
|
|
|
|
)]);
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(servers.clone())],
|
|
|
|
|
)?;
|
2025-10-16 20:15:47 -07:00
|
|
|
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert_eq!(
|
|
|
|
|
serialized,
|
|
|
|
|
r#"[mcp_servers.docs]
|
|
|
|
|
url = "https://example.com/mcp"
|
|
|
|
|
bearer_token_env_var = "MCP_TOKEN"
|
|
|
|
|
startup_timeout_sec = 2.0
|
|
|
|
|
|
|
|
|
|
[mcp_servers.docs.http_headers]
|
|
|
|
|
X-Doc = "42"
|
|
|
|
|
|
|
|
|
|
[mcp_servers.docs.env_http_headers]
|
|
|
|
|
X-Auth = "DOCS_AUTH"
|
|
|
|
|
"#
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
|
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
match &docs.transport {
|
|
|
|
|
McpServerTransportConfig::StreamableHttp {
|
|
|
|
|
http_headers,
|
|
|
|
|
env_http_headers,
|
|
|
|
|
..
|
|
|
|
|
} => {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
http_headers,
|
|
|
|
|
&Some(HashMap::from([("X-Doc".to_string(), "42".to_string())]))
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
env_http_headers,
|
|
|
|
|
&Some(HashMap::from([(
|
|
|
|
|
"X-Auth".to_string(),
|
|
|
|
|
"DOCS_AUTH".to_string()
|
|
|
|
|
)]))
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn replace_mcp_servers_streamable_http_removes_optional_sections() -> anyhow::Result<()> {
|
2025-10-16 20:15:47 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
|
|
|
|
|
let mut servers = BTreeMap::from([(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::StreamableHttp {
|
|
|
|
|
url: "https://example.com/mcp".to_string(),
|
|
|
|
|
bearer_token_env_var: Some("MCP_TOKEN".to_string()),
|
|
|
|
|
http_headers: Some(HashMap::from([("X-Doc".to_string(), "42".to_string())])),
|
|
|
|
|
env_http_headers: Some(HashMap::from([(
|
|
|
|
|
"X-Auth".to_string(),
|
|
|
|
|
"DOCS_AUTH".to_string(),
|
|
|
|
|
)])),
|
|
|
|
|
},
|
|
|
|
|
enabled: true,
|
|
|
|
|
startup_timeout_sec: Some(Duration::from_secs(2)),
|
|
|
|
|
tool_timeout_sec: None,
|
2025-10-20 15:35:36 -07:00
|
|
|
enabled_tools: None,
|
|
|
|
|
disabled_tools: None,
|
2025-10-16 20:15:47 -07:00
|
|
|
},
|
|
|
|
|
)]);
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(servers.clone())],
|
|
|
|
|
)?;
|
2025-10-16 20:15:47 -07:00
|
|
|
let serialized_with_optional = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert!(serialized_with_optional.contains("bearer_token_env_var = \"MCP_TOKEN\""));
|
|
|
|
|
assert!(serialized_with_optional.contains("[mcp_servers.docs.http_headers]"));
|
|
|
|
|
assert!(serialized_with_optional.contains("[mcp_servers.docs.env_http_headers]"));
|
|
|
|
|
|
2025-09-26 18:24:01 -07:00
|
|
|
servers.insert(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::StreamableHttp {
|
|
|
|
|
url: "https://example.com/mcp".to_string(),
|
2025-10-07 20:21:37 -07:00
|
|
|
bearer_token_env_var: None,
|
2025-10-16 20:15:47 -07:00
|
|
|
http_headers: None,
|
|
|
|
|
env_http_headers: None,
|
2025-09-26 18:24:01 -07:00
|
|
|
},
|
2025-10-08 13:24:51 -07:00
|
|
|
enabled: true,
|
2025-09-26 18:24:01 -07:00
|
|
|
startup_timeout_sec: None,
|
|
|
|
|
tool_timeout_sec: None,
|
2025-10-20 15:35:36 -07:00
|
|
|
enabled_tools: None,
|
|
|
|
|
disabled_tools: None,
|
2025-09-26 18:24:01 -07:00
|
|
|
},
|
|
|
|
|
);
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(servers.clone())],
|
|
|
|
|
)?;
|
2025-09-26 18:24:01 -07:00
|
|
|
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert_eq!(
|
|
|
|
|
serialized,
|
|
|
|
|
r#"[mcp_servers.docs]
|
|
|
|
|
url = "https://example.com/mcp"
|
|
|
|
|
"#
|
|
|
|
|
);
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-26 18:24:01 -07:00
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
match &docs.transport {
|
2025-10-07 20:21:37 -07:00
|
|
|
McpServerTransportConfig::StreamableHttp {
|
|
|
|
|
url,
|
|
|
|
|
bearer_token_env_var,
|
2025-10-16 20:15:47 -07:00
|
|
|
http_headers,
|
|
|
|
|
env_http_headers,
|
2025-10-07 20:21:37 -07:00
|
|
|
} => {
|
2025-09-26 18:24:01 -07:00
|
|
|
assert_eq!(url, "https://example.com/mcp");
|
2025-10-07 20:21:37 -07:00
|
|
|
assert!(bearer_token_env_var.is_none());
|
2025-10-16 20:15:47 -07:00
|
|
|
assert!(http_headers.is_none());
|
|
|
|
|
assert!(env_http_headers.is_none());
|
|
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert!(docs.startup_timeout_sec.is_none());
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn replace_mcp_servers_streamable_http_isolates_headers_between_servers()
|
2025-10-16 20:15:47 -07:00
|
|
|
-> anyhow::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
|
|
|
|
|
let servers = BTreeMap::from([
|
|
|
|
|
(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::StreamableHttp {
|
|
|
|
|
url: "https://example.com/mcp".to_string(),
|
|
|
|
|
bearer_token_env_var: Some("MCP_TOKEN".to_string()),
|
|
|
|
|
http_headers: Some(HashMap::from([(
|
|
|
|
|
"X-Doc".to_string(),
|
|
|
|
|
"42".to_string(),
|
|
|
|
|
)])),
|
|
|
|
|
env_http_headers: Some(HashMap::from([(
|
|
|
|
|
"X-Auth".to_string(),
|
|
|
|
|
"DOCS_AUTH".to_string(),
|
|
|
|
|
)])),
|
|
|
|
|
},
|
|
|
|
|
enabled: true,
|
|
|
|
|
startup_timeout_sec: Some(Duration::from_secs(2)),
|
|
|
|
|
tool_timeout_sec: None,
|
2025-10-20 15:35:36 -07:00
|
|
|
enabled_tools: None,
|
|
|
|
|
disabled_tools: None,
|
2025-10-16 20:15:47 -07:00
|
|
|
},
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
"logs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::Stdio {
|
|
|
|
|
command: "logs-server".to_string(),
|
|
|
|
|
args: vec!["--follow".to_string()],
|
|
|
|
|
env: None,
|
2025-10-16 21:24:43 -07:00
|
|
|
env_vars: Vec::new(),
|
|
|
|
|
cwd: None,
|
2025-10-16 20:15:47 -07:00
|
|
|
},
|
|
|
|
|
enabled: true,
|
|
|
|
|
startup_timeout_sec: None,
|
|
|
|
|
tool_timeout_sec: None,
|
2025-10-20 15:35:36 -07:00
|
|
|
enabled_tools: None,
|
|
|
|
|
disabled_tools: None,
|
2025-10-16 20:15:47 -07:00
|
|
|
},
|
|
|
|
|
),
|
|
|
|
|
]);
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(servers.clone())],
|
|
|
|
|
)?;
|
2025-10-16 20:15:47 -07:00
|
|
|
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert!(
|
|
|
|
|
serialized.contains("[mcp_servers.docs.http_headers]"),
|
|
|
|
|
"serialized config missing docs headers section:\n{serialized}"
|
|
|
|
|
);
|
|
|
|
|
assert!(
|
|
|
|
|
!serialized.contains("[mcp_servers.logs.http_headers]"),
|
|
|
|
|
"serialized config should not add logs headers section:\n{serialized}"
|
|
|
|
|
);
|
|
|
|
|
assert!(
|
|
|
|
|
!serialized.contains("[mcp_servers.logs.env_http_headers]"),
|
|
|
|
|
"serialized config should not add logs env headers section:\n{serialized}"
|
|
|
|
|
);
|
|
|
|
|
assert!(
|
|
|
|
|
!serialized.contains("mcp_servers.logs.bearer_token_env_var"),
|
|
|
|
|
"serialized config should not add bearer token to logs:\n{serialized}"
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
|
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
match &docs.transport {
|
|
|
|
|
McpServerTransportConfig::StreamableHttp {
|
|
|
|
|
http_headers,
|
|
|
|
|
env_http_headers,
|
|
|
|
|
..
|
|
|
|
|
} => {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
http_headers,
|
|
|
|
|
&Some(HashMap::from([("X-Doc".to_string(), "42".to_string())]))
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
env_http_headers,
|
|
|
|
|
&Some(HashMap::from([(
|
|
|
|
|
"X-Auth".to_string(),
|
|
|
|
|
"DOCS_AUTH".to_string()
|
|
|
|
|
)]))
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
let logs = loaded.get("logs").expect("logs entry");
|
|
|
|
|
match &logs.transport {
|
|
|
|
|
McpServerTransportConfig::Stdio { env, .. } => {
|
|
|
|
|
assert!(env.is_none());
|
2025-09-26 18:24:01 -07:00
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-08 13:24:51 -07:00
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn replace_mcp_servers_serializes_disabled_flag() -> anyhow::Result<()> {
|
2025-10-08 13:24:51 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
let servers = BTreeMap::from([(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::Stdio {
|
|
|
|
|
command: "docs-server".to_string(),
|
|
|
|
|
args: Vec::new(),
|
|
|
|
|
env: None,
|
2025-10-16 21:24:43 -07:00
|
|
|
env_vars: Vec::new(),
|
|
|
|
|
cwd: None,
|
2025-10-08 13:24:51 -07:00
|
|
|
},
|
|
|
|
|
enabled: false,
|
|
|
|
|
startup_timeout_sec: None,
|
|
|
|
|
tool_timeout_sec: None,
|
2025-10-20 15:35:36 -07:00
|
|
|
enabled_tools: None,
|
|
|
|
|
disabled_tools: None,
|
2025-10-08 13:24:51 -07:00
|
|
|
},
|
|
|
|
|
)]);
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(servers.clone())],
|
|
|
|
|
)?;
|
2025-10-08 13:24:51 -07:00
|
|
|
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert!(
|
|
|
|
|
serialized.contains("enabled = false"),
|
|
|
|
|
"serialized config missing disabled flag:\n{serialized}"
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
|
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
assert!(!docs.enabled);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-20 15:35:36 -07:00
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn replace_mcp_servers_serializes_tool_filters() -> anyhow::Result<()> {
|
2025-10-20 15:35:36 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
let servers = BTreeMap::from([(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::Stdio {
|
|
|
|
|
command: "docs-server".to_string(),
|
|
|
|
|
args: Vec::new(),
|
|
|
|
|
env: None,
|
|
|
|
|
env_vars: Vec::new(),
|
|
|
|
|
cwd: None,
|
|
|
|
|
},
|
|
|
|
|
enabled: true,
|
|
|
|
|
startup_timeout_sec: None,
|
|
|
|
|
tool_timeout_sec: None,
|
|
|
|
|
enabled_tools: Some(vec!["allowed".to_string()]),
|
|
|
|
|
disabled_tools: Some(vec!["blocked".to_string()]),
|
|
|
|
|
},
|
|
|
|
|
)]);
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
apply_blocking(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
&[ConfigEdit::ReplaceMcpServers(servers.clone())],
|
|
|
|
|
)?;
|
2025-10-20 15:35:36 -07:00
|
|
|
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert!(serialized.contains(r#"enabled_tools = ["allowed"]"#));
|
|
|
|
|
assert!(serialized.contains(r#"disabled_tools = ["blocked"]"#));
|
|
|
|
|
|
|
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
|
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
assert_eq!(
|
|
|
|
|
docs.enabled_tools.as_ref(),
|
|
|
|
|
Some(&vec!["allowed".to_string()])
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
docs.disabled_tools.as_ref(),
|
|
|
|
|
Some(&vec!["blocked".to_string()])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-11 15:04:29 -07:00
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn set_model_updates_defaults() -> anyhow::Result<()> {
|
2025-09-11 15:04:29 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
ConfigEditsBuilder::new(codex_home.path())
|
|
|
|
|
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
|
|
|
|
.apply()
|
|
|
|
|
.await?;
|
2025-09-11 15:04:29 -07:00
|
|
|
|
|
|
|
|
let serialized =
|
|
|
|
|
tokio::fs::read_to_string(codex_home.path().join(CONFIG_TOML_FILE)).await?;
|
|
|
|
|
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
|
|
|
|
|
2025-09-15 08:17:13 -07:00
|
|
|
assert_eq!(parsed.model.as_deref(), Some("gpt-5-codex"));
|
2025-09-11 15:04:29 -07:00
|
|
|
assert_eq!(parsed.model_reasoning_effort, Some(ReasoningEffort::High));
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn set_model_overwrites_existing_model() -> anyhow::Result<()> {
|
2025-09-11 15:04:29 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
|
|
|
|
|
tokio::fs::write(
|
|
|
|
|
&config_path,
|
|
|
|
|
r#"
|
2025-09-22 20:10:52 -07:00
|
|
|
model = "gpt-5-codex"
|
2025-09-11 15:04:29 -07:00
|
|
|
model_reasoning_effort = "medium"
|
|
|
|
|
|
|
|
|
|
[profiles.dev]
|
|
|
|
|
model = "gpt-4.1"
|
|
|
|
|
"#,
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
ConfigEditsBuilder::new(codex_home.path())
|
|
|
|
|
.set_model(Some("o4-mini"), Some(ReasoningEffort::High))
|
|
|
|
|
.apply()
|
|
|
|
|
.await?;
|
2025-09-11 15:04:29 -07:00
|
|
|
|
|
|
|
|
let serialized = tokio::fs::read_to_string(config_path).await?;
|
|
|
|
|
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
|
|
|
|
|
|
|
|
|
assert_eq!(parsed.model.as_deref(), Some("o4-mini"));
|
|
|
|
|
assert_eq!(parsed.model_reasoning_effort, Some(ReasoningEffort::High));
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parsed
|
|
|
|
|
.profiles
|
|
|
|
|
.get("dev")
|
|
|
|
|
.and_then(|profile| profile.model.as_deref()),
|
|
|
|
|
Some("gpt-4.1"),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn set_model_updates_profile() -> anyhow::Result<()> {
|
2025-09-11 15:04:29 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
ConfigEditsBuilder::new(codex_home.path())
|
|
|
|
|
.with_profile(Some("dev"))
|
|
|
|
|
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::Medium))
|
|
|
|
|
.apply()
|
|
|
|
|
.await?;
|
2025-09-11 15:04:29 -07:00
|
|
|
|
|
|
|
|
let serialized =
|
|
|
|
|
tokio::fs::read_to_string(codex_home.path().join(CONFIG_TOML_FILE)).await?;
|
|
|
|
|
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
|
|
|
|
let profile = parsed
|
|
|
|
|
.profiles
|
|
|
|
|
.get("dev")
|
|
|
|
|
.expect("profile should be created");
|
|
|
|
|
|
2025-09-15 08:17:13 -07:00
|
|
|
assert_eq!(profile.model.as_deref(), Some("gpt-5-codex"));
|
2025-09-12 22:44:05 -07:00
|
|
|
assert_eq!(
|
|
|
|
|
profile.model_reasoning_effort,
|
|
|
|
|
Some(ReasoningEffort::Medium)
|
|
|
|
|
);
|
2025-09-11 15:04:29 -07:00
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
2025-10-29 20:52:46 +00:00
|
|
|
async fn set_model_updates_existing_profile() -> anyhow::Result<()> {
|
2025-09-11 15:04:29 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
|
|
|
|
|
tokio::fs::write(
|
|
|
|
|
&config_path,
|
|
|
|
|
r#"
|
|
|
|
|
[profiles.dev]
|
|
|
|
|
model = "gpt-4"
|
|
|
|
|
model_reasoning_effort = "medium"
|
|
|
|
|
|
|
|
|
|
[profiles.prod]
|
2025-09-22 20:10:52 -07:00
|
|
|
model = "gpt-5-codex"
|
2025-09-11 15:04:29 -07:00
|
|
|
"#,
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
2025-10-29 20:52:46 +00:00
|
|
|
ConfigEditsBuilder::new(codex_home.path())
|
|
|
|
|
.with_profile(Some("dev"))
|
|
|
|
|
.set_model(Some("o4-high"), Some(ReasoningEffort::Medium))
|
|
|
|
|
.apply()
|
|
|
|
|
.await?;
|
2025-09-11 15:04:29 -07:00
|
|
|
|
|
|
|
|
let serialized = tokio::fs::read_to_string(config_path).await?;
|
|
|
|
|
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
|
|
|
|
|
|
|
|
|
let dev_profile = parsed
|
|
|
|
|
.profiles
|
|
|
|
|
.get("dev")
|
|
|
|
|
.expect("dev profile should survive updates");
|
|
|
|
|
assert_eq!(dev_profile.model.as_deref(), Some("o4-high"));
|
|
|
|
|
assert_eq!(
|
|
|
|
|
dev_profile.model_reasoning_effort,
|
|
|
|
|
Some(ReasoningEffort::Medium)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parsed
|
|
|
|
|
.profiles
|
|
|
|
|
.get("prod")
|
|
|
|
|
.and_then(|profile| profile.model.as_deref()),
|
2025-09-22 20:10:52 -07:00
|
|
|
Some("gpt-5-codex"),
|
2025-09-11 15:04:29 -07:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-15 00:30:13 -07:00
|
|
|
struct PrecedenceTestFixture {
|
|
|
|
|
cwd: TempDir,
|
|
|
|
|
codex_home: TempDir,
|
|
|
|
|
cfg: ConfigToml,
|
|
|
|
|
model_provider_map: HashMap<String, ModelProviderInfo>,
|
|
|
|
|
openai_provider: ModelProviderInfo,
|
|
|
|
|
openai_chat_completions_provider: ModelProviderInfo,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl PrecedenceTestFixture {
|
|
|
|
|
fn cwd(&self) -> PathBuf {
|
|
|
|
|
self.cwd.path().to_path_buf()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn codex_home(&self) -> PathBuf {
|
|
|
|
|
self.codex_home.path().to_path_buf()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn create_test_fixture() -> std::io::Result<PrecedenceTestFixture> {
|
2025-05-13 16:52:52 -07:00
|
|
|
let toml = r#"
|
|
|
|
|
model = "o3"
|
2025-06-24 22:19:21 -07:00
|
|
|
approval_policy = "untrusted"
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
# Can be used to determine which profile to use if not specified by
|
|
|
|
|
# `ConfigOverrides`.
|
|
|
|
|
profile = "gpt3"
|
|
|
|
|
|
|
|
|
|
[model_providers.openai-chat-completions]
|
|
|
|
|
name = "OpenAI using Chat Completions"
|
|
|
|
|
base_url = "https://api.openai.com/v1"
|
|
|
|
|
env_key = "OPENAI_API_KEY"
|
|
|
|
|
wire_api = "chat"
|
2025-07-18 12:12:39 -07:00
|
|
|
request_max_retries = 4 # retry failed HTTP requests
|
|
|
|
|
stream_max_retries = 10 # retry dropped SSE streams
|
|
|
|
|
stream_idle_timeout_ms = 300000 # 5m idle timeout
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
[profiles.o3]
|
|
|
|
|
model = "o3"
|
|
|
|
|
model_provider = "openai"
|
|
|
|
|
approval_policy = "never"
|
2025-07-08 22:05:22 +03:00
|
|
|
model_reasoning_effort = "high"
|
|
|
|
|
model_reasoning_summary = "detailed"
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
[profiles.gpt3]
|
|
|
|
|
model = "gpt-3.5-turbo"
|
|
|
|
|
model_provider = "openai-chat-completions"
|
|
|
|
|
|
|
|
|
|
[profiles.zdr]
|
|
|
|
|
model = "o3"
|
|
|
|
|
model_provider = "openai"
|
|
|
|
|
approval_policy = "on-failure"
|
2025-09-03 12:20:31 -07:00
|
|
|
|
|
|
|
|
[profiles.gpt5]
|
|
|
|
|
model = "gpt-5"
|
|
|
|
|
model_provider = "openai"
|
|
|
|
|
approval_policy = "on-failure"
|
|
|
|
|
model_reasoning_effort = "high"
|
|
|
|
|
model_reasoning_summary = "detailed"
|
|
|
|
|
model_verbosity = "high"
|
2025-05-13 16:52:52 -07:00
|
|
|
"#;
|
|
|
|
|
|
|
|
|
|
let cfg: ConfigToml = toml::from_str(toml).expect("TOML deserialization should succeed");
|
|
|
|
|
|
|
|
|
|
// Use a temporary directory for the cwd so it does not contain an
|
|
|
|
|
// AGENTS.md file.
|
|
|
|
|
let cwd_temp_dir = TempDir::new().unwrap();
|
|
|
|
|
let cwd = cwd_temp_dir.path().to_path_buf();
|
|
|
|
|
// Make it look like a Git repo so it does not search for AGENTS.md in
|
|
|
|
|
// a parent folder, either.
|
|
|
|
|
std::fs::write(cwd.join(".git"), "gitdir: nowhere")?;
|
|
|
|
|
|
2025-05-15 00:30:13 -07:00
|
|
|
let codex_home_temp_dir = TempDir::new().unwrap();
|
|
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
let openai_chat_completions_provider = ModelProviderInfo {
|
|
|
|
|
name: "OpenAI using Chat Completions".to_string(),
|
2025-07-30 12:40:15 -07:00
|
|
|
base_url: Some("https://api.openai.com/v1".to_string()),
|
2025-05-13 16:52:52 -07:00
|
|
|
env_key: Some("OPENAI_API_KEY".to_string()),
|
|
|
|
|
wire_api: crate::WireApi::Chat,
|
|
|
|
|
env_key_instructions: None,
|
2025-10-21 14:02:56 -07:00
|
|
|
experimental_bearer_token: None,
|
2025-06-30 11:39:54 -07:00
|
|
|
query_params: None,
|
2025-07-07 13:09:16 -07:00
|
|
|
http_headers: None,
|
|
|
|
|
env_http_headers: None,
|
2025-07-18 12:12:39 -07:00
|
|
|
request_max_retries: Some(4),
|
|
|
|
|
stream_max_retries: Some(10),
|
|
|
|
|
stream_idle_timeout_ms: Some(300_000),
|
2025-08-06 13:02:00 -07:00
|
|
|
requires_openai_auth: false,
|
2025-05-13 16:52:52 -07:00
|
|
|
};
|
|
|
|
|
let model_provider_map = {
|
|
|
|
|
let mut model_provider_map = built_in_model_providers();
|
|
|
|
|
model_provider_map.insert(
|
|
|
|
|
"openai-chat-completions".to_string(),
|
|
|
|
|
openai_chat_completions_provider.clone(),
|
|
|
|
|
);
|
|
|
|
|
model_provider_map
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let openai_provider = model_provider_map
|
|
|
|
|
.get("openai")
|
|
|
|
|
.expect("openai provider should exist")
|
|
|
|
|
.clone();
|
|
|
|
|
|
2025-05-15 00:30:13 -07:00
|
|
|
Ok(PrecedenceTestFixture {
|
|
|
|
|
cwd: cwd_temp_dir,
|
|
|
|
|
codex_home: codex_home_temp_dir,
|
|
|
|
|
cfg,
|
|
|
|
|
model_provider_map,
|
|
|
|
|
openai_provider,
|
|
|
|
|
openai_chat_completions_provider,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Users can specify config values at multiple levels that have the
|
|
|
|
|
/// following precedence:
|
|
|
|
|
///
|
|
|
|
|
/// 1. custom command-line argument, e.g. `--model o3`
|
|
|
|
|
/// 2. as part of a profile, where the `--profile` is specified via a CLI
|
2025-07-29 11:22:02 -07:00
|
|
|
/// (or in the config file itself)
|
2025-05-15 00:30:13 -07:00
|
|
|
/// 3. as an entry in `config.toml`, e.g. `model = "o3"`
|
|
|
|
|
/// 4. the default value for a required field defined in code, e.g.,
|
|
|
|
|
/// `crate::flags::OPENAI_DEFAULT_MODEL`
|
|
|
|
|
///
|
|
|
|
|
/// Note that profiles are the recommended way to specify a group of
|
|
|
|
|
/// configuration options together.
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> {
|
|
|
|
|
let fixture = create_test_fixture()?;
|
|
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
let o3_profile_overrides = ConfigOverrides {
|
|
|
|
|
config_profile: Some("o3".to_string()),
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: Some(fixture.cwd()),
|
2025-05-13 16:52:52 -07:00
|
|
|
..Default::default()
|
|
|
|
|
};
|
2025-05-15 00:30:13 -07:00
|
|
|
let o3_profile_config: Config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
fixture.cfg.clone(),
|
|
|
|
|
o3_profile_overrides,
|
|
|
|
|
fixture.codex_home(),
|
|
|
|
|
)?;
|
2025-05-13 16:52:52 -07:00
|
|
|
assert_eq!(
|
|
|
|
|
Config {
|
|
|
|
|
model: "o3".to_string(),
|
2025-09-16 13:36:51 -07:00
|
|
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
2025-08-04 23:50:03 -07:00
|
|
|
model_family: find_family_for_model("o3").expect("known model slug"),
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
model_context_window: Some(200_000),
|
|
|
|
|
model_max_output_tokens: Some(100_000),
|
2025-10-20 11:29:49 -07:00
|
|
|
model_auto_compact_token_limit: Some(180_000),
|
2025-05-13 16:52:52 -07:00
|
|
|
model_provider_id: "openai".to_string(),
|
2025-05-15 00:30:13 -07:00
|
|
|
model_provider: fixture.openai_provider.clone(),
|
2025-05-13 16:52:52 -07:00
|
|
|
approval_policy: AskForApproval::Never,
|
|
|
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
2025-10-16 11:23:38 -07:00
|
|
|
did_user_set_custom_approval_policy_or_sandbox_mode: true,
|
2025-10-27 18:19:32 -07:00
|
|
|
forced_auto_mode_downgraded_on_windows: false,
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
2025-07-22 09:42:22 -07:00
|
|
|
user_instructions: None,
|
2025-05-13 16:52:52 -07:00
|
|
|
notify: None,
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: fixture.cwd(),
|
2025-10-27 19:41:49 -07:00
|
|
|
cli_auth_credentials_store_mode: Default::default(),
|
2025-05-13 16:52:52 -07:00
|
|
|
mcp_servers: HashMap::new(),
|
2025-10-07 19:39:32 -07:00
|
|
|
mcp_oauth_credentials_store_mode: Default::default(),
|
2025-05-15 00:30:13 -07:00
|
|
|
model_providers: fixture.model_provider_map.clone(),
|
2025-05-13 16:52:52 -07:00
|
|
|
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
2025-10-01 11:19:59 -07:00
|
|
|
project_doc_fallback_filenames: Vec::new(),
|
2025-05-15 00:30:13 -07:00
|
|
|
codex_home: fixture.codex_home(),
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
history: History::default(),
|
2025-05-16 11:33:08 -07:00
|
|
|
file_opener: UriBasedFileOpener::VsCode,
|
2025-05-22 21:52:28 -07:00
|
|
|
codex_linux_sandbox_exe: None,
|
2025-05-30 23:14:56 -07:00
|
|
|
hide_agent_reasoning: false,
|
2025-08-05 01:56:13 -07:00
|
|
|
show_raw_agent_reasoning: false,
|
2025-09-12 12:06:33 -07:00
|
|
|
model_reasoning_effort: Some(ReasoningEffort::High),
|
2025-07-08 22:05:22 +03:00
|
|
|
model_reasoning_summary: ReasoningSummary::Detailed,
|
2025-08-22 17:12:10 +01:00
|
|
|
model_verbosity: None,
|
2025-07-11 13:30:11 -04:00
|
|
|
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
2025-07-22 09:42:22 -07:00
|
|
|
base_instructions: None,
|
2025-10-20 08:50:54 -07:00
|
|
|
forced_chatgpt_workspace_id: None,
|
|
|
|
|
forced_login_method: None,
|
2025-08-15 11:55:53 -04:00
|
|
|
include_apply_patch_tool: false,
|
2025-08-23 22:58:56 -07:00
|
|
|
tools_web_search_request: false,
|
2025-10-24 17:23:44 -05:00
|
|
|
experimental_sandbox_command_assessment: false,
|
2025-08-22 18:10:55 -07:00
|
|
|
use_experimental_streamable_shell_tool: false,
|
2025-09-11 09:19:12 -07:00
|
|
|
use_experimental_unified_exec_tool: false,
|
2025-09-26 10:13:37 -07:00
|
|
|
use_experimental_use_rmcp_client: false,
|
2025-08-27 17:41:23 -07:00
|
|
|
include_view_image_tool: true,
|
2025-10-14 18:50:00 +01:00
|
|
|
features: Features::with_defaults(),
|
2025-09-10 13:53:46 -07:00
|
|
|
active_profile: Some("o3".to_string()),
|
2025-10-16 11:23:38 -07:00
|
|
|
active_project: ProjectConfig { trust_level: None },
|
2025-10-04 17:41:40 -07:00
|
|
|
windows_wsl_setup_acknowledged: false,
|
2025-10-16 17:31:46 -07:00
|
|
|
notices: Default::default(),
|
2025-08-28 12:54:12 -07:00
|
|
|
disable_paste_burst: false,
|
2025-09-15 10:22:02 -07:00
|
|
|
tui_notifications: Default::default(),
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
otel: OtelConfig::default(),
|
2025-05-13 16:52:52 -07:00
|
|
|
},
|
|
|
|
|
o3_profile_config
|
|
|
|
|
);
|
2025-05-15 00:30:13 -07:00
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> {
|
|
|
|
|
let fixture = create_test_fixture()?;
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
let gpt3_profile_overrides = ConfigOverrides {
|
|
|
|
|
config_profile: Some("gpt3".to_string()),
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: Some(fixture.cwd()),
|
2025-05-13 16:52:52 -07:00
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
let gpt3_profile_config = Config::load_from_base_config_with_overrides(
|
2025-05-15 00:30:13 -07:00
|
|
|
fixture.cfg.clone(),
|
2025-05-13 16:52:52 -07:00
|
|
|
gpt3_profile_overrides,
|
2025-05-15 00:30:13 -07:00
|
|
|
fixture.codex_home(),
|
2025-05-13 16:52:52 -07:00
|
|
|
)?;
|
|
|
|
|
let expected_gpt3_profile_config = Config {
|
|
|
|
|
model: "gpt-3.5-turbo".to_string(),
|
2025-09-16 13:36:51 -07:00
|
|
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
2025-08-04 23:50:03 -07:00
|
|
|
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
model_context_window: Some(16_385),
|
|
|
|
|
model_max_output_tokens: Some(4_096),
|
2025-10-20 11:29:49 -07:00
|
|
|
model_auto_compact_token_limit: Some(14_746),
|
2025-05-13 16:52:52 -07:00
|
|
|
model_provider_id: "openai-chat-completions".to_string(),
|
2025-05-15 00:30:13 -07:00
|
|
|
model_provider: fixture.openai_chat_completions_provider.clone(),
|
2025-06-25 12:26:13 -07:00
|
|
|
approval_policy: AskForApproval::UnlessTrusted,
|
2025-05-13 16:52:52 -07:00
|
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
2025-10-16 11:23:38 -07:00
|
|
|
did_user_set_custom_approval_policy_or_sandbox_mode: true,
|
2025-10-27 18:19:32 -07:00
|
|
|
forced_auto_mode_downgraded_on_windows: false,
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
2025-07-22 09:42:22 -07:00
|
|
|
user_instructions: None,
|
2025-05-13 16:52:52 -07:00
|
|
|
notify: None,
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: fixture.cwd(),
|
2025-10-27 19:41:49 -07:00
|
|
|
cli_auth_credentials_store_mode: Default::default(),
|
2025-05-13 16:52:52 -07:00
|
|
|
mcp_servers: HashMap::new(),
|
2025-10-07 19:39:32 -07:00
|
|
|
mcp_oauth_credentials_store_mode: Default::default(),
|
2025-05-15 00:30:13 -07:00
|
|
|
model_providers: fixture.model_provider_map.clone(),
|
2025-05-13 16:52:52 -07:00
|
|
|
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
2025-10-01 11:19:59 -07:00
|
|
|
project_doc_fallback_filenames: Vec::new(),
|
2025-05-15 00:30:13 -07:00
|
|
|
codex_home: fixture.codex_home(),
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
history: History::default(),
|
2025-05-16 11:33:08 -07:00
|
|
|
file_opener: UriBasedFileOpener::VsCode,
|
2025-05-22 21:52:28 -07:00
|
|
|
codex_linux_sandbox_exe: None,
|
2025-05-30 23:14:56 -07:00
|
|
|
hide_agent_reasoning: false,
|
2025-08-05 01:56:13 -07:00
|
|
|
show_raw_agent_reasoning: false,
|
2025-09-12 12:06:33 -07:00
|
|
|
model_reasoning_effort: None,
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
model_reasoning_summary: ReasoningSummary::default(),
|
2025-08-22 17:12:10 +01:00
|
|
|
model_verbosity: None,
|
2025-07-11 13:30:11 -04:00
|
|
|
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
2025-07-22 09:42:22 -07:00
|
|
|
base_instructions: None,
|
2025-10-20 08:50:54 -07:00
|
|
|
forced_chatgpt_workspace_id: None,
|
|
|
|
|
forced_login_method: None,
|
2025-08-15 11:55:53 -04:00
|
|
|
include_apply_patch_tool: false,
|
2025-08-23 22:58:56 -07:00
|
|
|
tools_web_search_request: false,
|
2025-10-24 17:23:44 -05:00
|
|
|
experimental_sandbox_command_assessment: false,
|
2025-08-22 18:10:55 -07:00
|
|
|
use_experimental_streamable_shell_tool: false,
|
2025-09-11 09:19:12 -07:00
|
|
|
use_experimental_unified_exec_tool: false,
|
2025-09-26 10:13:37 -07:00
|
|
|
use_experimental_use_rmcp_client: false,
|
2025-08-27 17:41:23 -07:00
|
|
|
include_view_image_tool: true,
|
2025-10-14 18:50:00 +01:00
|
|
|
features: Features::with_defaults(),
|
2025-09-10 13:53:46 -07:00
|
|
|
active_profile: Some("gpt3".to_string()),
|
2025-10-16 11:23:38 -07:00
|
|
|
active_project: ProjectConfig { trust_level: None },
|
2025-10-04 17:41:40 -07:00
|
|
|
windows_wsl_setup_acknowledged: false,
|
2025-10-16 17:31:46 -07:00
|
|
|
notices: Default::default(),
|
2025-08-28 12:54:12 -07:00
|
|
|
disable_paste_burst: false,
|
2025-09-15 10:22:02 -07:00
|
|
|
tui_notifications: Default::default(),
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
otel: OtelConfig::default(),
|
2025-05-13 16:52:52 -07:00
|
|
|
};
|
2025-05-15 00:30:13 -07:00
|
|
|
|
|
|
|
|
assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
// Verify that loading without specifying a profile in ConfigOverrides
|
2025-05-15 00:30:13 -07:00
|
|
|
// uses the default profile from the config file (which is "gpt3").
|
2025-05-13 16:52:52 -07:00
|
|
|
let default_profile_overrides = ConfigOverrides {
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: Some(fixture.cwd()),
|
2025-05-13 16:52:52 -07:00
|
|
|
..Default::default()
|
|
|
|
|
};
|
2025-05-15 00:30:13 -07:00
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
let default_profile_config = Config::load_from_base_config_with_overrides(
|
2025-05-15 00:30:13 -07:00
|
|
|
fixture.cfg.clone(),
|
2025-05-13 16:52:52 -07:00
|
|
|
default_profile_overrides,
|
2025-05-15 00:30:13 -07:00
|
|
|
fixture.codex_home(),
|
2025-05-13 16:52:52 -07:00
|
|
|
)?;
|
2025-05-15 00:30:13 -07:00
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
assert_eq!(expected_gpt3_profile_config, default_profile_config);
|
2025-05-15 00:30:13 -07:00
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> {
|
|
|
|
|
let fixture = create_test_fixture()?;
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
let zdr_profile_overrides = ConfigOverrides {
|
|
|
|
|
config_profile: Some("zdr".to_string()),
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: Some(fixture.cwd()),
|
2025-05-13 16:52:52 -07:00
|
|
|
..Default::default()
|
|
|
|
|
};
|
2025-05-15 00:30:13 -07:00
|
|
|
let zdr_profile_config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
fixture.cfg.clone(),
|
|
|
|
|
zdr_profile_overrides,
|
|
|
|
|
fixture.codex_home(),
|
|
|
|
|
)?;
|
|
|
|
|
let expected_zdr_profile_config = Config {
|
|
|
|
|
model: "o3".to_string(),
|
2025-09-16 13:36:51 -07:00
|
|
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
2025-08-04 23:50:03 -07:00
|
|
|
model_family: find_family_for_model("o3").expect("known model slug"),
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
model_context_window: Some(200_000),
|
|
|
|
|
model_max_output_tokens: Some(100_000),
|
2025-10-20 11:29:49 -07:00
|
|
|
model_auto_compact_token_limit: Some(180_000),
|
2025-05-15 00:30:13 -07:00
|
|
|
model_provider_id: "openai".to_string(),
|
|
|
|
|
model_provider: fixture.openai_provider.clone(),
|
|
|
|
|
approval_policy: AskForApproval::OnFailure,
|
|
|
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
2025-10-16 11:23:38 -07:00
|
|
|
did_user_set_custom_approval_policy_or_sandbox_mode: true,
|
2025-10-27 18:19:32 -07:00
|
|
|
forced_auto_mode_downgraded_on_windows: false,
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
2025-07-22 09:42:22 -07:00
|
|
|
user_instructions: None,
|
2025-05-15 00:30:13 -07:00
|
|
|
notify: None,
|
|
|
|
|
cwd: fixture.cwd(),
|
2025-10-27 19:41:49 -07:00
|
|
|
cli_auth_credentials_store_mode: Default::default(),
|
2025-05-15 00:30:13 -07:00
|
|
|
mcp_servers: HashMap::new(),
|
2025-10-07 19:39:32 -07:00
|
|
|
mcp_oauth_credentials_store_mode: Default::default(),
|
2025-05-15 00:30:13 -07:00
|
|
|
model_providers: fixture.model_provider_map.clone(),
|
|
|
|
|
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
2025-10-01 11:19:59 -07:00
|
|
|
project_doc_fallback_filenames: Vec::new(),
|
2025-05-15 00:30:13 -07:00
|
|
|
codex_home: fixture.codex_home(),
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
history: History::default(),
|
2025-05-16 11:33:08 -07:00
|
|
|
file_opener: UriBasedFileOpener::VsCode,
|
2025-05-22 21:52:28 -07:00
|
|
|
codex_linux_sandbox_exe: None,
|
2025-05-30 23:14:56 -07:00
|
|
|
hide_agent_reasoning: false,
|
2025-08-05 01:56:13 -07:00
|
|
|
show_raw_agent_reasoning: false,
|
2025-09-12 12:06:33 -07:00
|
|
|
model_reasoning_effort: None,
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
model_reasoning_summary: ReasoningSummary::default(),
|
2025-08-22 17:12:10 +01:00
|
|
|
model_verbosity: None,
|
2025-07-11 13:30:11 -04:00
|
|
|
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
2025-07-22 09:42:22 -07:00
|
|
|
base_instructions: None,
|
2025-10-20 08:50:54 -07:00
|
|
|
forced_chatgpt_workspace_id: None,
|
|
|
|
|
forced_login_method: None,
|
2025-08-15 11:55:53 -04:00
|
|
|
include_apply_patch_tool: false,
|
2025-08-23 22:58:56 -07:00
|
|
|
tools_web_search_request: false,
|
2025-10-24 17:23:44 -05:00
|
|
|
experimental_sandbox_command_assessment: false,
|
2025-08-22 18:10:55 -07:00
|
|
|
use_experimental_streamable_shell_tool: false,
|
2025-09-11 09:19:12 -07:00
|
|
|
use_experimental_unified_exec_tool: false,
|
2025-09-26 10:13:37 -07:00
|
|
|
use_experimental_use_rmcp_client: false,
|
2025-08-27 17:41:23 -07:00
|
|
|
include_view_image_tool: true,
|
2025-10-14 18:50:00 +01:00
|
|
|
features: Features::with_defaults(),
|
2025-09-10 13:53:46 -07:00
|
|
|
active_profile: Some("zdr".to_string()),
|
2025-10-16 11:23:38 -07:00
|
|
|
active_project: ProjectConfig { trust_level: None },
|
2025-10-04 17:41:40 -07:00
|
|
|
windows_wsl_setup_acknowledged: false,
|
2025-10-16 17:31:46 -07:00
|
|
|
notices: Default::default(),
|
2025-08-28 12:54:12 -07:00
|
|
|
disable_paste_burst: false,
|
2025-09-15 10:22:02 -07:00
|
|
|
tui_notifications: Default::default(),
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
otel: OtelConfig::default(),
|
2025-05-15 00:30:13 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
assert_eq!(expected_zdr_profile_config, zdr_profile_config);
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-09-03 12:20:31 -07:00
|
|
|
#[test]
|
|
|
|
|
fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> {
|
|
|
|
|
let fixture = create_test_fixture()?;
|
|
|
|
|
|
|
|
|
|
let gpt5_profile_overrides = ConfigOverrides {
|
|
|
|
|
config_profile: Some("gpt5".to_string()),
|
|
|
|
|
cwd: Some(fixture.cwd()),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
let gpt5_profile_config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
fixture.cfg.clone(),
|
|
|
|
|
gpt5_profile_overrides,
|
|
|
|
|
fixture.codex_home(),
|
|
|
|
|
)?;
|
|
|
|
|
let expected_gpt5_profile_config = Config {
|
|
|
|
|
model: "gpt-5".to_string(),
|
2025-09-16 13:36:51 -07:00
|
|
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
2025-09-03 12:20:31 -07:00
|
|
|
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
2025-09-04 16:34:14 -07:00
|
|
|
model_context_window: Some(272_000),
|
2025-09-03 12:20:31 -07:00
|
|
|
model_max_output_tokens: Some(128_000),
|
2025-10-20 11:29:49 -07:00
|
|
|
model_auto_compact_token_limit: Some(244_800),
|
2025-09-03 12:20:31 -07:00
|
|
|
model_provider_id: "openai".to_string(),
|
|
|
|
|
model_provider: fixture.openai_provider.clone(),
|
|
|
|
|
approval_policy: AskForApproval::OnFailure,
|
|
|
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
2025-10-16 11:23:38 -07:00
|
|
|
did_user_set_custom_approval_policy_or_sandbox_mode: true,
|
2025-10-27 18:19:32 -07:00
|
|
|
forced_auto_mode_downgraded_on_windows: false,
|
2025-09-03 12:20:31 -07:00
|
|
|
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
|
|
|
|
user_instructions: None,
|
|
|
|
|
notify: None,
|
|
|
|
|
cwd: fixture.cwd(),
|
2025-10-27 19:41:49 -07:00
|
|
|
cli_auth_credentials_store_mode: Default::default(),
|
2025-09-03 12:20:31 -07:00
|
|
|
mcp_servers: HashMap::new(),
|
2025-10-07 19:39:32 -07:00
|
|
|
mcp_oauth_credentials_store_mode: Default::default(),
|
2025-09-03 12:20:31 -07:00
|
|
|
model_providers: fixture.model_provider_map.clone(),
|
|
|
|
|
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
2025-10-01 11:19:59 -07:00
|
|
|
project_doc_fallback_filenames: Vec::new(),
|
2025-09-03 12:20:31 -07:00
|
|
|
codex_home: fixture.codex_home(),
|
|
|
|
|
history: History::default(),
|
|
|
|
|
file_opener: UriBasedFileOpener::VsCode,
|
|
|
|
|
codex_linux_sandbox_exe: None,
|
|
|
|
|
hide_agent_reasoning: false,
|
|
|
|
|
show_raw_agent_reasoning: false,
|
2025-09-12 12:06:33 -07:00
|
|
|
model_reasoning_effort: Some(ReasoningEffort::High),
|
2025-09-03 12:20:31 -07:00
|
|
|
model_reasoning_summary: ReasoningSummary::Detailed,
|
|
|
|
|
model_verbosity: Some(Verbosity::High),
|
|
|
|
|
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
|
|
|
|
base_instructions: None,
|
2025-10-20 08:50:54 -07:00
|
|
|
forced_chatgpt_workspace_id: None,
|
|
|
|
|
forced_login_method: None,
|
2025-09-03 12:20:31 -07:00
|
|
|
include_apply_patch_tool: false,
|
|
|
|
|
tools_web_search_request: false,
|
2025-10-24 17:23:44 -05:00
|
|
|
experimental_sandbox_command_assessment: false,
|
2025-09-03 12:20:31 -07:00
|
|
|
use_experimental_streamable_shell_tool: false,
|
2025-09-11 09:19:12 -07:00
|
|
|
use_experimental_unified_exec_tool: false,
|
2025-09-26 10:13:37 -07:00
|
|
|
use_experimental_use_rmcp_client: false,
|
2025-09-03 12:20:31 -07:00
|
|
|
include_view_image_tool: true,
|
2025-10-14 18:50:00 +01:00
|
|
|
features: Features::with_defaults(),
|
2025-09-10 13:53:46 -07:00
|
|
|
active_profile: Some("gpt5".to_string()),
|
2025-10-16 11:23:38 -07:00
|
|
|
active_project: ProjectConfig { trust_level: None },
|
2025-10-04 17:41:40 -07:00
|
|
|
windows_wsl_setup_acknowledged: false,
|
2025-10-16 17:31:46 -07:00
|
|
|
notices: Default::default(),
|
2025-09-03 12:20:31 -07:00
|
|
|
disable_paste_burst: false,
|
2025-09-15 10:22:02 -07:00
|
|
|
tui_notifications: Default::default(),
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
otel: OtelConfig::default(),
|
2025-09-03 12:20:31 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
assert_eq!(expected_gpt5_profile_config, gpt5_profile_config);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-16 11:23:38 -07:00
|
|
|
#[test]
|
|
|
|
|
fn test_did_user_set_custom_approval_policy_or_sandbox_mode_defaults_no() -> anyhow::Result<()>
|
|
|
|
|
{
|
|
|
|
|
let fixture = create_test_fixture()?;
|
|
|
|
|
|
|
|
|
|
let config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
fixture.cfg.clone(),
|
|
|
|
|
ConfigOverrides {
|
|
|
|
|
..Default::default()
|
|
|
|
|
},
|
|
|
|
|
fixture.codex_home(),
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
assert!(config.did_user_set_custom_approval_policy_or_sandbox_mode);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-21 13:20:36 -07:00
|
|
|
#[test]
|
|
|
|
|
fn test_set_project_trusted_writes_explicit_tables() -> anyhow::Result<()> {
|
2025-09-10 16:01:31 -07:00
|
|
|
let project_dir = Path::new("/some/path");
|
|
|
|
|
let mut doc = DocumentMut::new();
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-09-10 16:01:31 -07:00
|
|
|
set_project_trusted_inner(&mut doc, project_dir)?;
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-09-10 16:01:31 -07:00
|
|
|
let contents = doc.to_string();
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-09-10 16:01:31 -07:00
|
|
|
let raw_path = project_dir.to_string_lossy();
|
2025-08-22 14:04:21 -07:00
|
|
|
let path_str = if raw_path.contains('\\') {
|
2025-08-28 11:25:23 -07:00
|
|
|
format!("'{raw_path}'")
|
2025-08-22 14:04:21 -07:00
|
|
|
} else {
|
2025-08-28 11:25:23 -07:00
|
|
|
format!("\"{raw_path}\"")
|
2025-08-22 14:04:21 -07:00
|
|
|
};
|
|
|
|
|
let expected = format!(
|
|
|
|
|
r#"[projects.{path_str}]
|
|
|
|
|
trust_level = "trusted"
|
|
|
|
|
"#
|
2025-08-21 13:20:36 -07:00
|
|
|
);
|
2025-08-22 14:04:21 -07:00
|
|
|
assert_eq!(contents, expected);
|
2025-08-21 13:20:36 -07:00
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_set_project_trusted_converts_inline_to_explicit() -> anyhow::Result<()> {
|
2025-09-10 16:01:31 -07:00
|
|
|
let project_dir = Path::new("/some/path");
|
2025-08-21 13:20:36 -07:00
|
|
|
|
|
|
|
|
// Seed config.toml with an inline project entry under [projects]
|
2025-09-10 16:01:31 -07:00
|
|
|
let raw_path = project_dir.to_string_lossy();
|
2025-08-22 14:04:21 -07:00
|
|
|
let path_str = if raw_path.contains('\\') {
|
2025-08-28 11:25:23 -07:00
|
|
|
format!("'{raw_path}'")
|
2025-08-22 14:04:21 -07:00
|
|
|
} else {
|
2025-08-28 11:25:23 -07:00
|
|
|
format!("\"{raw_path}\"")
|
2025-08-22 14:04:21 -07:00
|
|
|
};
|
|
|
|
|
// Use a quoted key so backslashes don't require escaping on Windows
|
2025-08-21 13:20:36 -07:00
|
|
|
let initial = format!(
|
2025-08-22 14:04:21 -07:00
|
|
|
r#"[projects]
|
|
|
|
|
{path_str} = {{ trust_level = "untrusted" }}
|
|
|
|
|
"#
|
2025-08-21 13:20:36 -07:00
|
|
|
);
|
2025-09-10 16:01:31 -07:00
|
|
|
let mut doc = initial.parse::<DocumentMut>()?;
|
2025-08-21 13:20:36 -07:00
|
|
|
|
|
|
|
|
// Run the function; it should convert to explicit tables and set trusted
|
2025-09-10 16:01:31 -07:00
|
|
|
set_project_trusted_inner(&mut doc, project_dir)?;
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-09-10 16:01:31 -07:00
|
|
|
let contents = doc.to_string();
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-08-22 14:04:21 -07:00
|
|
|
// Assert exact output after conversion to explicit table
|
|
|
|
|
let expected = format!(
|
|
|
|
|
r#"[projects]
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-08-22 14:04:21 -07:00
|
|
|
[projects.{path_str}]
|
|
|
|
|
trust_level = "trusted"
|
|
|
|
|
"#
|
2025-08-21 13:20:36 -07:00
|
|
|
);
|
2025-08-22 14:04:21 -07:00
|
|
|
assert_eq!(contents, expected);
|
2025-08-21 13:20:36 -07:00
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
2025-09-10 16:01:31 -07:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_set_project_trusted_migrates_top_level_inline_projects_preserving_entries()
|
|
|
|
|
-> anyhow::Result<()> {
|
|
|
|
|
let initial = r#"toplevel = "baz"
|
|
|
|
|
projects = { "/Users/mbolin/code/codex4" = { trust_level = "trusted", foo = "bar" } , "/Users/mbolin/code/codex3" = { trust_level = "trusted" } }
|
|
|
|
|
model = "foo""#;
|
|
|
|
|
let mut doc = initial.parse::<DocumentMut>()?;
|
|
|
|
|
|
|
|
|
|
// Approve a new directory
|
|
|
|
|
let new_project = Path::new("/Users/mbolin/code/codex2");
|
|
|
|
|
set_project_trusted_inner(&mut doc, new_project)?;
|
|
|
|
|
|
|
|
|
|
let contents = doc.to_string();
|
|
|
|
|
|
|
|
|
|
// Since we created the [projects] table as part of migration, it is kept implicit.
|
|
|
|
|
// Expect explicit per-project tables, preserving prior entries and appending the new one.
|
|
|
|
|
let expected = r#"toplevel = "baz"
|
|
|
|
|
model = "foo"
|
|
|
|
|
|
|
|
|
|
[projects."/Users/mbolin/code/codex4"]
|
|
|
|
|
trust_level = "trusted"
|
|
|
|
|
foo = "bar"
|
|
|
|
|
|
|
|
|
|
[projects."/Users/mbolin/code/codex3"]
|
|
|
|
|
trust_level = "trusted"
|
|
|
|
|
|
|
|
|
|
[projects."/Users/mbolin/code/codex2"]
|
|
|
|
|
trust_level = "trusted"
|
|
|
|
|
"#;
|
|
|
|
|
assert_eq!(contents, expected);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
2025-04-29 18:42:52 -07:00
|
|
|
}
|
2025-09-15 10:22:02 -07:00
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod notifications_tests {
|
2025-10-30 10:28:32 +00:00
|
|
|
use crate::config::types::Notifications;
|
2025-10-05 14:12:31 -07:00
|
|
|
use assert_matches::assert_matches;
|
2025-09-15 10:22:02 -07:00
|
|
|
use serde::Deserialize;
|
|
|
|
|
|
|
|
|
|
#[derive(Deserialize, Debug, PartialEq)]
|
|
|
|
|
struct TuiTomlTest {
|
|
|
|
|
notifications: Notifications,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Deserialize, Debug, PartialEq)]
|
|
|
|
|
struct RootTomlTest {
|
|
|
|
|
tui: TuiTomlTest,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_tui_notifications_true() {
|
|
|
|
|
let toml = r#"
|
|
|
|
|
[tui]
|
|
|
|
|
notifications = true
|
|
|
|
|
"#;
|
|
|
|
|
let parsed: RootTomlTest = toml::from_str(toml).expect("deserialize notifications=true");
|
2025-10-05 14:12:31 -07:00
|
|
|
assert_matches!(parsed.tui.notifications, Notifications::Enabled(true));
|
2025-09-15 10:22:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_tui_notifications_custom_array() {
|
|
|
|
|
let toml = r#"
|
|
|
|
|
[tui]
|
|
|
|
|
notifications = ["foo"]
|
|
|
|
|
"#;
|
|
|
|
|
let parsed: RootTomlTest =
|
|
|
|
|
toml::from_str(toml).expect("deserialize notifications=[\"foo\"]");
|
2025-10-05 14:12:31 -07:00
|
|
|
assert_matches!(
|
2025-09-15 10:22:02 -07:00
|
|
|
parsed.tui.notifications,
|
|
|
|
|
Notifications::Custom(ref v) if v == &vec!["foo".to_string()]
|
2025-10-05 14:12:31 -07:00
|
|
|
);
|
2025-09-15 10:22:02 -07:00
|
|
|
}
|
|
|
|
|
}
|