2025-10-03 13:02:26 -07:00
|
|
|
use crate::config_loader::LoadedConfigLayers;
|
|
|
|
|
pub use crate::config_loader::load_config_as_toml;
|
|
|
|
|
use crate::config_loader::load_config_layers_with_overrides;
|
|
|
|
|
use crate::config_loader::merge_toml_values;
|
2025-05-13 16:52:52 -07:00
|
|
|
use crate::config_profile::ConfigProfile;
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
use crate::config_types::DEFAULT_OTEL_ENVIRONMENT;
|
2025-05-20 11:55:25 -07:00
|
|
|
use crate::config_types::History;
|
|
|
|
|
use crate::config_types::McpServerConfig;
|
2025-09-26 18:24:01 -07:00
|
|
|
use crate::config_types::McpServerTransportConfig;
|
2025-09-15 10:22:02 -07:00
|
|
|
use crate::config_types::Notifications;
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
use crate::config_types::OtelConfig;
|
|
|
|
|
use crate::config_types::OtelConfigToml;
|
|
|
|
|
use crate::config_types::OtelExporterKind;
|
2025-09-04 11:00:01 -07:00
|
|
|
use crate::config_types::ReasoningSummaryFormat;
|
2025-08-07 01:30:13 -07:00
|
|
|
use crate::config_types::SandboxWorkspaceWrite;
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
use crate::config_types::ShellEnvironmentPolicy;
|
|
|
|
|
use crate::config_types::ShellEnvironmentPolicyToml;
|
2025-05-20 11:55:25 -07:00
|
|
|
use crate::config_types::Tui;
|
|
|
|
|
use crate::config_types::UriBasedFileOpener;
|
2025-08-22 13:54:51 -07:00
|
|
|
use crate::git_info::resolve_root_git_project_for_trust;
|
2025-08-04 23:50:03 -07:00
|
|
|
use crate::model_family::ModelFamily;
|
2025-09-14 15:45:15 -07:00
|
|
|
use crate::model_family::derive_default_model_family;
|
2025-08-04 23:50:03 -07:00
|
|
|
use crate::model_family::find_family_for_model;
|
2025-05-07 17:38:28 -07:00
|
|
|
use crate::model_provider_info::ModelProviderInfo;
|
|
|
|
|
use crate::model_provider_info::built_in_model_providers;
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
use crate::openai_model_info::get_model_info;
|
2025-04-27 21:47:50 -07:00
|
|
|
use crate::protocol::AskForApproval;
|
|
|
|
|
use crate::protocol::SandboxPolicy;
|
2025-09-11 15:04:29 -07:00
|
|
|
use anyhow::Context;
|
fix: remove mcp-types from app server protocol (#4537)
We continue the separation between `codex app-server` and `codex
mcp-server`.
In particular, we introduce a new crate, `codex-app-server-protocol`,
and migrate `codex-rs/protocol/src/mcp_protocol.rs` into it, renaming it
`codex-rs/app-server-protocol/src/protocol.rs`.
Because `ConversationId` was defined in `mcp_protocol.rs`, we move it
into its own file, `codex-rs/protocol/src/conversation_id.rs`, and
because it is referenced in a ton of places, we have to touch a lot of
files as part of this PR.
We also decide to get away from proper JSON-RPC 2.0 semantics, so we
also introduce `codex-rs/app-server-protocol/src/jsonrpc_lite.rs`, which
is basically the same `JSONRPCMessage` type defined in `mcp-types`
except with all of the `"jsonrpc": "2.0"` removed.
Getting rid of `"jsonrpc": "2.0"` makes our serialization logic
considerably simpler, as we can lean heavier on serde to serialize
directly into the wire format that we use now.
2025-09-30 19:16:26 -07:00
|
|
|
use codex_app_server_protocol::Tools;
|
|
|
|
|
use codex_app_server_protocol::UserSavedConfig;
|
2025-08-18 11:50:17 -07:00
|
|
|
use codex_protocol::config_types::ReasoningEffort;
|
|
|
|
|
use codex_protocol::config_types::ReasoningSummary;
|
2025-08-18 09:36:57 -07:00
|
|
|
use codex_protocol::config_types::SandboxMode;
|
2025-09-03 12:20:31 -07:00
|
|
|
use codex_protocol::config_types::Verbosity;
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
use dirs::home_dir;
|
|
|
|
|
use serde::Deserialize;
|
2025-09-14 21:30:56 -07:00
|
|
|
use std::collections::BTreeMap;
|
feat: support mcp_servers in config.toml (#829)
This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.
(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)
`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.
This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)
To test, I added the following to my `~/.codex/config.toml`:
```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```
And then I ran the following:
```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):
This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph
Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```
Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
2025-05-06 15:47:59 -07:00
|
|
|
use std::collections::HashMap;
|
2025-05-13 16:52:52 -07:00
|
|
|
use std::path::Path;
|
2025-04-27 21:47:50 -07:00
|
|
|
use std::path::PathBuf;
|
2025-08-07 09:27:38 -07:00
|
|
|
use tempfile::NamedTempFile;
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
use toml::Value as TomlValue;
|
2025-09-14 21:30:56 -07:00
|
|
|
use toml_edit::Array as TomlArray;
|
2025-08-07 09:27:38 -07:00
|
|
|
use toml_edit::DocumentMut;
|
2025-09-14 21:30:56 -07:00
|
|
|
use toml_edit::Item as TomlItem;
|
|
|
|
|
use toml_edit::Table as TomlTable;
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
|
2025-10-03 14:00:03 -07:00
|
|
|
#[cfg(target_os = "windows")]
|
|
|
|
|
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
|
|
|
|
|
#[cfg(not(target_os = "windows"))]
|
|
|
|
|
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5-codex";
|
2025-09-16 13:36:51 -07:00
|
|
|
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5-codex";
|
2025-09-15 08:17:13 -07:00
|
|
|
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5-codex";
|
2025-08-07 10:13:13 -07:00
|
|
|
|
2025-05-10 17:52:59 -07:00
|
|
|
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
|
|
|
|
/// files are *silently truncated* to this size so we do not take up too much of
|
|
|
|
|
/// the context window.
|
|
|
|
|
pub(crate) const PROJECT_DOC_MAX_BYTES: usize = 32 * 1024; // 32 KiB
|
|
|
|
|
|
2025-09-10 13:53:46 -07:00
|
|
|
pub(crate) const CONFIG_TOML_FILE: &str = "config.toml";
|
2025-08-07 09:27:38 -07:00
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
/// Application configuration loaded from disk and merged with overrides.
|
2025-05-13 16:52:52 -07:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
pub struct Config {
|
2025-04-27 21:47:50 -07:00
|
|
|
/// Optional override of model selection.
|
|
|
|
|
pub model: String,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
|
2025-09-22 20:10:52 -07:00
|
|
|
/// Model used specifically for review sessions. Defaults to "gpt-5-codex".
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
pub review_model: String,
|
|
|
|
|
|
2025-08-04 23:50:03 -07:00
|
|
|
pub model_family: ModelFamily,
|
|
|
|
|
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
/// Size of the context window for the model, in tokens.
|
|
|
|
|
pub model_context_window: Option<u64>,
|
|
|
|
|
|
|
|
|
|
/// Maximum number of output tokens.
|
|
|
|
|
pub model_max_output_tokens: Option<u64>,
|
|
|
|
|
|
2025-09-12 13:07:10 -07:00
|
|
|
/// Token usage threshold triggering auto-compaction of conversation history.
|
|
|
|
|
pub model_auto_compact_token_limit: Option<i64>,
|
|
|
|
|
|
2025-05-08 21:46:06 -07:00
|
|
|
/// Key into the model_providers map that specifies which provider to use.
|
|
|
|
|
pub model_provider_id: String,
|
|
|
|
|
|
2025-05-07 17:38:28 -07:00
|
|
|
/// Info needed to make an API request to the model.
|
|
|
|
|
pub model_provider: ModelProviderInfo,
|
|
|
|
|
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
/// Approval policy for executing commands.
|
2025-04-27 21:47:50 -07:00
|
|
|
pub approval_policy: AskForApproval,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
pub sandbox_policy: SandboxPolicy,
|
2025-04-28 15:39:34 -07:00
|
|
|
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
pub shell_environment_policy: ShellEnvironmentPolicy,
|
|
|
|
|
|
2025-05-30 23:14:56 -07:00
|
|
|
/// When `true`, `AgentReasoning` events emitted by the backend will be
|
|
|
|
|
/// suppressed from the frontend output. This can reduce visual noise when
|
|
|
|
|
/// users are only interested in the final agent responses.
|
|
|
|
|
pub hide_agent_reasoning: bool,
|
|
|
|
|
|
2025-08-05 01:56:13 -07:00
|
|
|
/// When set to `true`, `AgentReasoningRawContentEvent` events will be shown in the UI/output.
|
|
|
|
|
/// Defaults to `false`.
|
|
|
|
|
pub show_raw_agent_reasoning: bool,
|
|
|
|
|
|
2025-08-06 11:48:03 -07:00
|
|
|
/// User-provided instructions from AGENTS.md.
|
2025-07-22 09:42:22 -07:00
|
|
|
pub user_instructions: Option<String>,
|
|
|
|
|
|
|
|
|
|
/// Base instructions override.
|
|
|
|
|
pub base_instructions: Option<String>,
|
feat: configurable notifications in the Rust CLI (#793)
With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:
```rust
pub(crate) enum UserNotification {
#[serde(rename_all = "kebab-case")]
AgentTurnComplete {
turn_id: String,
/// Messages that the user sent to the agent to initiate the turn.
input_messages: Vec<String>,
/// The last message sent by the assistant in the turn.
last_assistant_message: Option<String>,
},
}
```
This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.
For example, I have the following in my `~/.codex/config.toml`:
```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```
I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:
```python
#!/usr/bin/env python3
import json
import subprocess
import sys
def main() -> int:
if len(sys.argv) != 2:
print("Usage: notify.py <NOTIFICATION_JSON>")
return 1
try:
notification = json.loads(sys.argv[1])
except json.JSONDecodeError:
return 1
match notification_type := notification.get("type"):
case "agent-turn-complete":
assistant_message = notification.get("last-assistant-message")
if assistant_message:
title = f"Codex: {assistant_message}"
else:
title = "Codex: Turn Complete!"
input_messages = notification.get("input_messages", [])
message = " ".join(input_messages)
title += message
case _:
print(f"not sending a push notification for: {notification_type}")
return 0
subprocess.check_output(
[
"terminal-notifier",
"-title",
title,
"-message",
message,
"-group",
"codex",
"-ignoreDnD",
"-activate",
"com.googlecode.iterm2",
]
)
return 0
if __name__ == "__main__":
sys.exit(main())
```
For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:
* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
2025-05-02 19:48:13 -07:00
|
|
|
|
|
|
|
|
/// Optional external notifier command. When set, Codex will spawn this
|
|
|
|
|
/// program after each completed *turn* (i.e. when the agent finishes
|
|
|
|
|
/// processing a user submission). The value must be the full command
|
|
|
|
|
/// broken into argv tokens **without** the trailing JSON argument - Codex
|
|
|
|
|
/// appends one extra argument containing a JSON payload describing the
|
|
|
|
|
/// event.
|
|
|
|
|
///
|
|
|
|
|
/// Example `~/.codex/config.toml` snippet:
|
|
|
|
|
///
|
|
|
|
|
/// ```toml
|
|
|
|
|
/// notify = ["notify-send", "Codex"]
|
|
|
|
|
/// ```
|
|
|
|
|
///
|
|
|
|
|
/// which will be invoked as:
|
|
|
|
|
///
|
|
|
|
|
/// ```shell
|
|
|
|
|
/// notify-send Codex '{"type":"agent-turn-complete","turn-id":"12345"}'
|
|
|
|
|
/// ```
|
|
|
|
|
///
|
|
|
|
|
/// If unset the feature is disabled.
|
|
|
|
|
pub notify: Option<Vec<String>>,
|
2025-05-04 10:57:12 -07:00
|
|
|
|
2025-09-15 10:22:02 -07:00
|
|
|
/// TUI notifications preference. When set, the TUI will send OSC 9 notifications on approvals
|
|
|
|
|
/// and turn completions when not focused.
|
|
|
|
|
pub tui_notifications: Notifications,
|
|
|
|
|
|
2025-05-04 10:57:12 -07:00
|
|
|
/// The directory that should be treated as the current working directory
|
|
|
|
|
/// for the session. All relative paths inside the business-logic layer are
|
|
|
|
|
/// resolved against this path.
|
|
|
|
|
pub cwd: PathBuf,
|
feat: support mcp_servers in config.toml (#829)
This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.
(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)
`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.
This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)
To test, I added the following to my `~/.codex/config.toml`:
```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```
And then I ran the following:
```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):
This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph
Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```
Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
2025-05-06 15:47:59 -07:00
|
|
|
|
|
|
|
|
/// Definition for MCP servers that Codex can reach out to for tool calls.
|
|
|
|
|
pub mcp_servers: HashMap<String, McpServerConfig>,
|
2025-05-07 17:38:28 -07:00
|
|
|
|
|
|
|
|
/// Combined provider map (defaults merged with user-defined overrides).
|
|
|
|
|
pub model_providers: HashMap<String, ModelProviderInfo>,
|
2025-05-10 17:52:59 -07:00
|
|
|
|
|
|
|
|
/// Maximum number of bytes to include from an AGENTS.md project doc file.
|
|
|
|
|
pub project_doc_max_bytes: usize,
|
2025-05-15 00:30:13 -07:00
|
|
|
|
2025-10-01 11:19:59 -07:00
|
|
|
/// Additional filenames to try when looking for project-level docs.
|
|
|
|
|
pub project_doc_fallback_filenames: Vec<String>,
|
|
|
|
|
|
2025-05-15 00:30:13 -07:00
|
|
|
/// Directory containing all Codex state (defaults to `~/.codex` but can be
|
|
|
|
|
/// overridden by the `CODEX_HOME` environment variable).
|
|
|
|
|
pub codex_home: PathBuf,
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
|
|
|
|
|
/// Settings that govern if and what will be written to `~/.codex/history.jsonl`.
|
|
|
|
|
pub history: History,
|
2025-05-16 11:33:08 -07:00
|
|
|
|
|
|
|
|
/// Optional URI-based file opener. If set, citations to files in the model
|
|
|
|
|
/// output will be hyperlinked using the specified URI scheme.
|
|
|
|
|
pub file_opener: UriBasedFileOpener,
|
2025-05-16 16:16:50 -07:00
|
|
|
|
2025-05-22 21:52:28 -07:00
|
|
|
/// Path to the `codex-linux-sandbox` executable. This must be set if
|
|
|
|
|
/// [`crate::exec::SandboxType::LinuxSeccomp`] is used. Note that this
|
|
|
|
|
/// cannot be set in the config file: it must be set in code via
|
|
|
|
|
/// [`ConfigOverrides`].
|
|
|
|
|
///
|
|
|
|
|
/// When this program is invoked, arg0 will be set to `codex-linux-sandbox`.
|
|
|
|
|
pub codex_linux_sandbox_exe: Option<PathBuf>,
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
|
2025-08-19 10:55:07 -07:00
|
|
|
/// Value to use for `reasoning.effort` when making a request using the
|
|
|
|
|
/// Responses API.
|
2025-09-12 12:06:33 -07:00
|
|
|
pub model_reasoning_effort: Option<ReasoningEffort>,
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
|
|
|
|
|
/// If not "none", the value to use for `reasoning.summary` when making a
|
|
|
|
|
/// request using the Responses API.
|
|
|
|
|
pub model_reasoning_summary: ReasoningSummary,
|
2025-07-10 14:30:33 -07:00
|
|
|
|
2025-08-22 17:12:10 +01:00
|
|
|
/// Optional verbosity control for GPT-5 models (Responses API `text.verbosity`).
|
|
|
|
|
pub model_verbosity: Option<Verbosity>,
|
|
|
|
|
|
2025-07-11 13:30:11 -04:00
|
|
|
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
|
|
|
|
pub chatgpt_base_url: String,
|
2025-07-18 17:04:04 -07:00
|
|
|
|
2025-07-29 11:22:02 -07:00
|
|
|
/// Include an experimental plan tool that the model can use to update its current plan and status of each step.
|
|
|
|
|
pub include_plan_tool: bool,
|
2025-08-01 09:55:23 -07:00
|
|
|
|
2025-08-15 11:55:53 -04:00
|
|
|
/// Include the `apply_patch` tool for models that benefit from invoking
|
|
|
|
|
/// file edits as a structured tool call. When unset, this falls back to the
|
|
|
|
|
/// model family's default preference.
|
|
|
|
|
pub include_apply_patch_tool: bool,
|
|
|
|
|
|
2025-08-23 22:58:56 -07:00
|
|
|
pub tools_web_search_request: bool,
|
|
|
|
|
|
2025-08-22 18:10:55 -07:00
|
|
|
pub use_experimental_streamable_shell_tool: bool,
|
2025-08-27 17:41:23 -07:00
|
|
|
|
Unified execution (#3288)
## Unified PTY-Based Exec Tool
Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`
- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
isolation, timeouts, and truncation behavior.
2025-09-10 17:38:11 -07:00
|
|
|
/// If set to `true`, used only the experimental unified exec tool.
|
|
|
|
|
pub use_experimental_unified_exec_tool: bool,
|
|
|
|
|
|
2025-09-26 10:13:37 -07:00
|
|
|
/// If set to `true`, use the experimental official Rust MCP client.
|
|
|
|
|
/// https://github.com/modelcontextprotocol/rust-sdk
|
|
|
|
|
pub use_experimental_use_rmcp_client: bool,
|
|
|
|
|
|
2025-08-27 17:41:23 -07:00
|
|
|
/// Include the `view_image` tool that lets the agent attach a local image path to context.
|
|
|
|
|
pub include_view_image_tool: bool,
|
2025-09-10 13:53:46 -07:00
|
|
|
|
|
|
|
|
/// The active profile name used to derive this `Config` (if any).
|
|
|
|
|
pub active_profile: Option<String>,
|
|
|
|
|
|
2025-10-04 17:41:40 -07:00
|
|
|
/// Tracks whether the Windows onboarding screen has been acknowledged.
|
|
|
|
|
pub windows_wsl_setup_acknowledged: bool,
|
|
|
|
|
|
2025-08-28 12:54:12 -07:00
|
|
|
/// When true, disables burst-paste detection for typed input entirely.
|
|
|
|
|
/// All characters are inserted as they are received, and no buffering
|
|
|
|
|
/// or placeholder replacement will occur for fast keypress bursts.
|
|
|
|
|
pub disable_paste_burst: bool,
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
|
|
|
|
|
/// OTEL configuration (exporter type, endpoint, headers, etc.).
|
|
|
|
|
pub otel: crate::config_types::OtelConfig,
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
}
|
|
|
|
|
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
impl Config {
|
2025-10-03 13:02:26 -07:00
|
|
|
pub async fn load_with_cli_overrides(
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
cli_overrides: Vec<(String, TomlValue)>,
|
|
|
|
|
overrides: ConfigOverrides,
|
|
|
|
|
) -> std::io::Result<Self> {
|
|
|
|
|
let codex_home = find_codex_home()?;
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let root_value = load_resolved_config(
|
|
|
|
|
&codex_home,
|
|
|
|
|
cli_overrides,
|
|
|
|
|
crate::config_loader::LoaderOverrides::default(),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
|
|
|
|
|
let cfg: ConfigToml = root_value.try_into().map_err(|e| {
|
|
|
|
|
tracing::error!("Failed to deserialize overridden config: {e}");
|
|
|
|
|
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
Self::load_from_base_config_with_overrides(cfg, overrides, codex_home)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
pub async fn load_config_as_toml_with_cli_overrides(
|
2025-08-07 09:27:38 -07:00
|
|
|
codex_home: &Path,
|
|
|
|
|
cli_overrides: Vec<(String, TomlValue)>,
|
|
|
|
|
) -> std::io::Result<ConfigToml> {
|
2025-10-03 13:02:26 -07:00
|
|
|
let root_value = load_resolved_config(
|
|
|
|
|
codex_home,
|
|
|
|
|
cli_overrides,
|
|
|
|
|
crate::config_loader::LoaderOverrides::default(),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
2025-08-07 09:27:38 -07:00
|
|
|
|
|
|
|
|
let cfg: ConfigToml = root_value.try_into().map_err(|e| {
|
|
|
|
|
tracing::error!("Failed to deserialize overridden config: {e}");
|
|
|
|
|
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
Ok(cfg)
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
async fn load_resolved_config(
|
|
|
|
|
codex_home: &Path,
|
|
|
|
|
cli_overrides: Vec<(String, TomlValue)>,
|
|
|
|
|
overrides: crate::config_loader::LoaderOverrides,
|
|
|
|
|
) -> std::io::Result<TomlValue> {
|
|
|
|
|
let layers = load_config_layers_with_overrides(codex_home, overrides).await?;
|
|
|
|
|
Ok(apply_overlays(layers, cli_overrides))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn apply_overlays(
|
|
|
|
|
layers: LoadedConfigLayers,
|
|
|
|
|
cli_overrides: Vec<(String, TomlValue)>,
|
|
|
|
|
) -> TomlValue {
|
|
|
|
|
let LoadedConfigLayers {
|
|
|
|
|
mut base,
|
|
|
|
|
managed_config,
|
|
|
|
|
managed_preferences,
|
|
|
|
|
} = layers;
|
|
|
|
|
|
|
|
|
|
for (path, value) in cli_overrides.into_iter() {
|
|
|
|
|
apply_toml_override(&mut base, &path, value);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for overlay in [managed_config, managed_preferences].into_iter().flatten() {
|
|
|
|
|
merge_toml_values(&mut base, &overlay);
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
}
|
2025-10-03 13:02:26 -07:00
|
|
|
|
|
|
|
|
base
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
pub async fn load_global_mcp_servers(
|
2025-09-14 21:30:56 -07:00
|
|
|
codex_home: &Path,
|
|
|
|
|
) -> std::io::Result<BTreeMap<String, McpServerConfig>> {
|
2025-10-03 13:02:26 -07:00
|
|
|
let root_value = load_config_as_toml(codex_home).await?;
|
2025-09-14 21:30:56 -07:00
|
|
|
let Some(servers_value) = root_value.get("mcp_servers") else {
|
|
|
|
|
return Ok(BTreeMap::new());
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
servers_value
|
|
|
|
|
.clone()
|
|
|
|
|
.try_into()
|
|
|
|
|
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn write_global_mcp_servers(
|
|
|
|
|
codex_home: &Path,
|
|
|
|
|
servers: &BTreeMap<String, McpServerConfig>,
|
|
|
|
|
) -> std::io::Result<()> {
|
|
|
|
|
let config_path = codex_home.join(CONFIG_TOML_FILE);
|
|
|
|
|
let mut doc = match std::fs::read_to_string(&config_path) {
|
|
|
|
|
Ok(contents) => contents
|
|
|
|
|
.parse::<DocumentMut>()
|
|
|
|
|
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?,
|
|
|
|
|
Err(e) if e.kind() == std::io::ErrorKind::NotFound => DocumentMut::new(),
|
|
|
|
|
Err(e) => return Err(e),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
doc.as_table_mut().remove("mcp_servers");
|
|
|
|
|
|
|
|
|
|
if !servers.is_empty() {
|
|
|
|
|
let mut table = TomlTable::new();
|
|
|
|
|
table.set_implicit(true);
|
|
|
|
|
doc["mcp_servers"] = TomlItem::Table(table);
|
|
|
|
|
|
|
|
|
|
for (name, config) in servers {
|
|
|
|
|
let mut entry = TomlTable::new();
|
|
|
|
|
entry.set_implicit(false);
|
2025-09-26 18:24:01 -07:00
|
|
|
match &config.transport {
|
|
|
|
|
McpServerTransportConfig::Stdio { command, args, env } => {
|
|
|
|
|
entry["command"] = toml_edit::value(command.clone());
|
|
|
|
|
|
|
|
|
|
if !args.is_empty() {
|
|
|
|
|
let mut args_array = TomlArray::new();
|
|
|
|
|
for arg in args {
|
|
|
|
|
args_array.push(arg.clone());
|
|
|
|
|
}
|
|
|
|
|
entry["args"] = TomlItem::Value(args_array.into());
|
|
|
|
|
}
|
2025-09-14 21:30:56 -07:00
|
|
|
|
2025-09-26 18:24:01 -07:00
|
|
|
if let Some(env) = env
|
|
|
|
|
&& !env.is_empty()
|
|
|
|
|
{
|
|
|
|
|
let mut env_table = TomlTable::new();
|
|
|
|
|
env_table.set_implicit(false);
|
|
|
|
|
let mut pairs: Vec<_> = env.iter().collect();
|
|
|
|
|
pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
|
|
|
|
|
for (key, value) in pairs {
|
|
|
|
|
env_table.insert(key, toml_edit::value(value.clone()));
|
|
|
|
|
}
|
|
|
|
|
entry["env"] = TomlItem::Table(env_table);
|
|
|
|
|
}
|
2025-09-14 21:30:56 -07:00
|
|
|
}
|
2025-09-26 18:24:01 -07:00
|
|
|
McpServerTransportConfig::StreamableHttp { url, bearer_token } => {
|
|
|
|
|
entry["url"] = toml_edit::value(url.clone());
|
|
|
|
|
if let Some(token) = bearer_token {
|
|
|
|
|
entry["bearer_token"] = toml_edit::value(token.clone());
|
|
|
|
|
}
|
2025-09-14 21:30:56 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-22 10:30:59 -07:00
|
|
|
if let Some(timeout) = config.startup_timeout_sec {
|
|
|
|
|
entry["startup_timeout_sec"] = toml_edit::value(timeout.as_secs_f64());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(timeout) = config.tool_timeout_sec {
|
|
|
|
|
entry["tool_timeout_sec"] = toml_edit::value(timeout.as_secs_f64());
|
2025-09-14 21:30:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
doc["mcp_servers"][name.as_str()] = TomlItem::Table(entry);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::fs::create_dir_all(codex_home)?;
|
|
|
|
|
let tmp_file = NamedTempFile::new_in(codex_home)?;
|
|
|
|
|
std::fs::write(tmp_file.path(), doc.to_string())?;
|
|
|
|
|
tmp_file.persist(config_path).map_err(|err| err.error)?;
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-10 16:01:31 -07:00
|
|
|
fn set_project_trusted_inner(doc: &mut DocumentMut, project_path: &Path) -> anyhow::Result<()> {
|
2025-08-21 13:20:36 -07:00
|
|
|
// Ensure we render a human-friendly structure:
|
|
|
|
|
//
|
|
|
|
|
// [projects]
|
|
|
|
|
// [projects."/path/to/project"]
|
|
|
|
|
// trust_level = "trusted"
|
|
|
|
|
//
|
|
|
|
|
// rather than inline tables like:
|
|
|
|
|
//
|
|
|
|
|
// [projects]
|
|
|
|
|
// "/path/to/project" = { trust_level = "trusted" }
|
2025-08-07 09:27:38 -07:00
|
|
|
let project_key = project_path.to_string_lossy().to_string();
|
2025-08-21 13:20:36 -07:00
|
|
|
|
|
|
|
|
// Ensure top-level `projects` exists as a non-inline, explicit table. If it
|
|
|
|
|
// exists but was previously represented as a non-table (e.g., inline),
|
|
|
|
|
// replace it with an explicit table.
|
|
|
|
|
{
|
|
|
|
|
let root = doc.as_table_mut();
|
2025-09-10 16:01:31 -07:00
|
|
|
// If `projects` exists but isn't a standard table (e.g., it's an inline table),
|
|
|
|
|
// convert it to an explicit table while preserving existing entries.
|
|
|
|
|
let existing_projects = root.get("projects").cloned();
|
|
|
|
|
if existing_projects.as_ref().is_none_or(|i| !i.is_table()) {
|
|
|
|
|
let mut projects_tbl = toml_edit::Table::new();
|
|
|
|
|
projects_tbl.set_implicit(true);
|
|
|
|
|
|
|
|
|
|
// If there was an existing inline table, migrate its entries to explicit tables.
|
|
|
|
|
if let Some(inline_tbl) = existing_projects.as_ref().and_then(|i| i.as_inline_table()) {
|
|
|
|
|
for (k, v) in inline_tbl.iter() {
|
|
|
|
|
if let Some(inner_tbl) = v.as_inline_table() {
|
|
|
|
|
let new_tbl = inner_tbl.clone().into_table();
|
|
|
|
|
projects_tbl.insert(k, toml_edit::Item::Table(new_tbl));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
root.insert("projects", toml_edit::Item::Table(projects_tbl));
|
2025-08-21 13:20:36 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
let Some(projects_tbl) = doc["projects"].as_table_mut() else {
|
|
|
|
|
return Err(anyhow::anyhow!(
|
|
|
|
|
"projects table missing after initialization"
|
|
|
|
|
));
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Ensure the per-project entry is its own explicit table. If it exists but
|
|
|
|
|
// is not a table (e.g., an inline table), replace it with an explicit table.
|
|
|
|
|
let needs_proj_table = !projects_tbl.contains_key(project_key.as_str())
|
|
|
|
|
|| projects_tbl
|
|
|
|
|
.get(project_key.as_str())
|
|
|
|
|
.and_then(|i| i.as_table())
|
|
|
|
|
.is_none();
|
|
|
|
|
if needs_proj_table {
|
|
|
|
|
projects_tbl.insert(project_key.as_str(), toml_edit::table());
|
|
|
|
|
}
|
|
|
|
|
let Some(proj_tbl) = projects_tbl
|
|
|
|
|
.get_mut(project_key.as_str())
|
|
|
|
|
.and_then(|i| i.as_table_mut())
|
|
|
|
|
else {
|
2025-09-30 03:10:33 -07:00
|
|
|
return Err(anyhow::anyhow!("project table missing for {project_key}"));
|
2025-08-21 13:20:36 -07:00
|
|
|
};
|
|
|
|
|
proj_tbl.set_implicit(false);
|
|
|
|
|
proj_tbl["trust_level"] = toml_edit::value("trusted");
|
2025-09-10 16:01:31 -07:00
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Patch `CODEX_HOME/config.toml` project state.
|
|
|
|
|
/// Use with caution.
|
|
|
|
|
pub fn set_project_trusted(codex_home: &Path, project_path: &Path) -> anyhow::Result<()> {
|
|
|
|
|
let config_path = codex_home.join(CONFIG_TOML_FILE);
|
|
|
|
|
// Parse existing config if present; otherwise start a new document.
|
|
|
|
|
let mut doc = match std::fs::read_to_string(config_path.clone()) {
|
|
|
|
|
Ok(s) => s.parse::<DocumentMut>()?,
|
|
|
|
|
Err(e) if e.kind() == std::io::ErrorKind::NotFound => DocumentMut::new(),
|
|
|
|
|
Err(e) => return Err(e.into()),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
set_project_trusted_inner(&mut doc, project_path)?;
|
2025-08-07 09:27:38 -07:00
|
|
|
|
|
|
|
|
// ensure codex_home exists
|
|
|
|
|
std::fs::create_dir_all(codex_home)?;
|
|
|
|
|
|
|
|
|
|
// create a tmp_file
|
|
|
|
|
let tmp_file = NamedTempFile::new_in(codex_home)?;
|
|
|
|
|
std::fs::write(tmp_file.path(), doc.to_string())?;
|
|
|
|
|
|
|
|
|
|
// atomically move the tmp file into config.toml
|
|
|
|
|
tmp_file.persist(config_path)?;
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-04 17:41:40 -07:00
|
|
|
/// Persist the acknowledgement flag for the Windows onboarding screen.
|
|
|
|
|
pub fn set_windows_wsl_setup_acknowledged(
|
|
|
|
|
codex_home: &Path,
|
|
|
|
|
acknowledged: bool,
|
|
|
|
|
) -> anyhow::Result<()> {
|
|
|
|
|
let config_path = codex_home.join(CONFIG_TOML_FILE);
|
|
|
|
|
let mut doc = match std::fs::read_to_string(config_path.clone()) {
|
|
|
|
|
Ok(s) => s.parse::<DocumentMut>()?,
|
|
|
|
|
Err(e) if e.kind() == std::io::ErrorKind::NotFound => DocumentMut::new(),
|
|
|
|
|
Err(e) => return Err(e.into()),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
doc["windows_wsl_setup_acknowledged"] = toml_edit::value(acknowledged);
|
|
|
|
|
|
|
|
|
|
std::fs::create_dir_all(codex_home)?;
|
|
|
|
|
|
|
|
|
|
let tmp_file = NamedTempFile::new_in(codex_home)?;
|
|
|
|
|
std::fs::write(tmp_file.path(), doc.to_string())?;
|
|
|
|
|
tmp_file.persist(config_path)?;
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-11 15:04:29 -07:00
|
|
|
fn ensure_profile_table<'a>(
|
|
|
|
|
doc: &'a mut DocumentMut,
|
|
|
|
|
profile_name: &str,
|
|
|
|
|
) -> anyhow::Result<&'a mut toml_edit::Table> {
|
|
|
|
|
let mut created_profiles_table = false;
|
|
|
|
|
{
|
|
|
|
|
let root = doc.as_table_mut();
|
|
|
|
|
let needs_table = !root.contains_key("profiles")
|
|
|
|
|
|| root
|
|
|
|
|
.get("profiles")
|
|
|
|
|
.and_then(|item| item.as_table())
|
|
|
|
|
.is_none();
|
|
|
|
|
if needs_table {
|
|
|
|
|
root.insert("profiles", toml_edit::table());
|
|
|
|
|
created_profiles_table = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let Some(profiles_table) = doc["profiles"].as_table_mut() else {
|
|
|
|
|
return Err(anyhow::anyhow!(
|
|
|
|
|
"profiles table missing after initialization"
|
|
|
|
|
));
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if created_profiles_table {
|
|
|
|
|
profiles_table.set_implicit(true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let needs_profile_table = !profiles_table.contains_key(profile_name)
|
|
|
|
|
|| profiles_table
|
|
|
|
|
.get(profile_name)
|
|
|
|
|
.and_then(|item| item.as_table())
|
|
|
|
|
.is_none();
|
|
|
|
|
if needs_profile_table {
|
|
|
|
|
profiles_table.insert(profile_name, toml_edit::table());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let Some(profile_table) = profiles_table
|
|
|
|
|
.get_mut(profile_name)
|
|
|
|
|
.and_then(|item| item.as_table_mut())
|
|
|
|
|
else {
|
|
|
|
|
return Err(anyhow::anyhow!(format!(
|
|
|
|
|
"profile table missing for {profile_name}"
|
|
|
|
|
)));
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
profile_table.set_implicit(false);
|
|
|
|
|
Ok(profile_table)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO(jif) refactor config persistence.
|
|
|
|
|
pub async fn persist_model_selection(
|
|
|
|
|
codex_home: &Path,
|
|
|
|
|
active_profile: Option<&str>,
|
|
|
|
|
model: &str,
|
|
|
|
|
effort: Option<ReasoningEffort>,
|
|
|
|
|
) -> anyhow::Result<()> {
|
|
|
|
|
let config_path = codex_home.join(CONFIG_TOML_FILE);
|
|
|
|
|
let serialized = match tokio::fs::read_to_string(&config_path).await {
|
|
|
|
|
Ok(contents) => contents,
|
|
|
|
|
Err(err) if err.kind() == std::io::ErrorKind::NotFound => String::new(),
|
|
|
|
|
Err(err) => return Err(err.into()),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let mut doc = if serialized.is_empty() {
|
|
|
|
|
DocumentMut::new()
|
|
|
|
|
} else {
|
|
|
|
|
serialized.parse::<DocumentMut>()?
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if let Some(profile_name) = active_profile {
|
|
|
|
|
let profile_table = ensure_profile_table(&mut doc, profile_name)?;
|
|
|
|
|
profile_table["model"] = toml_edit::value(model);
|
2025-09-12 12:06:33 -07:00
|
|
|
match effort {
|
|
|
|
|
Some(effort) => {
|
|
|
|
|
profile_table["model_reasoning_effort"] = toml_edit::value(effort.to_string());
|
|
|
|
|
}
|
|
|
|
|
None => {
|
|
|
|
|
profile_table.remove("model_reasoning_effort");
|
|
|
|
|
}
|
2025-09-11 15:04:29 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
let table = doc.as_table_mut();
|
|
|
|
|
table["model"] = toml_edit::value(model);
|
2025-09-12 12:06:33 -07:00
|
|
|
match effort {
|
|
|
|
|
Some(effort) => {
|
|
|
|
|
table["model_reasoning_effort"] = toml_edit::value(effort.to_string());
|
|
|
|
|
}
|
|
|
|
|
None => {
|
|
|
|
|
table.remove("model_reasoning_effort");
|
|
|
|
|
}
|
2025-09-11 15:04:29 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO(jif) refactor the home creation
|
|
|
|
|
tokio::fs::create_dir_all(codex_home)
|
|
|
|
|
.await
|
|
|
|
|
.with_context(|| {
|
|
|
|
|
format!(
|
|
|
|
|
"failed to create Codex home directory at {}",
|
|
|
|
|
codex_home.display()
|
|
|
|
|
)
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
tokio::fs::write(&config_path, doc.to_string())
|
|
|
|
|
.await
|
|
|
|
|
.with_context(|| format!("failed to persist config.toml at {}", config_path.display()))?;
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
feat: add support for -c/--config to override individual config items (#1137)
This PR introduces support for `-c`/`--config` so users can override
individual config values on the command line using `--config
name=value`. Example:
```
codex --config model=o4-mini
```
Making it possible to set arbitrary config values on the command line
results in a more flexible configuration scheme and makes it easier to
provide single-line examples that can be copy-pasted from documentation.
Effectively, it means there are four levels of configuration for some
values:
- Default value (e.g., `model` currently defaults to `o4-mini`)
- Value in `config.toml` (e.g., user could override the default to be
`model = "o3"` in their `config.toml`)
- Specifying `-c` or `--config` to override `model` (e.g., user can
include `-c model=o3` in their list of args to Codex)
- If available, a config-specific flag can be used, which takes
precedence over `-c` (e.g., user can specify `--model o3` in their list
of args to Codex)
Now that it is possible to specify anything that could be configured in
`config.toml` on the command line using `-c`, we do not need to have a
custom flag for every possible config option (which can clutter the
output of `--help`). To that end, as part of this PR, we drop support
for the `--disable-response-storage` flag, as users can now specify `-c
disable_response_storage=true` to get the equivalent functionality.
Under the hood, this works by loading the `config.toml` into a
`toml::Value`. Then for each `key=value`, we create a small synthetic
TOML file with `value` so that we can run the TOML parser to get the
equivalent `toml::Value`. We then parse `key` to determine the point in
the original `toml::Value` to do the insert/replace. Once all of the
overrides from `-c` args have been applied, the `toml::Value` is
deserialized into a `ConfigToml` and then the `ConfigOverrides` are
applied, as before.
2025-05-27 23:11:44 -07:00
|
|
|
/// Apply a single dotted-path override onto a TOML value.
|
|
|
|
|
fn apply_toml_override(root: &mut TomlValue, path: &str, value: TomlValue) {
|
|
|
|
|
use toml::value::Table;
|
|
|
|
|
|
|
|
|
|
let segments: Vec<&str> = path.split('.').collect();
|
|
|
|
|
let mut current = root;
|
|
|
|
|
|
|
|
|
|
for (idx, segment) in segments.iter().enumerate() {
|
|
|
|
|
let is_last = idx == segments.len() - 1;
|
|
|
|
|
|
|
|
|
|
if is_last {
|
|
|
|
|
match current {
|
|
|
|
|
TomlValue::Table(table) => {
|
|
|
|
|
table.insert(segment.to_string(), value);
|
|
|
|
|
}
|
|
|
|
|
_ => {
|
|
|
|
|
let mut table = Table::new();
|
|
|
|
|
table.insert(segment.to_string(), value);
|
|
|
|
|
*current = TomlValue::Table(table);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Traverse or create intermediate object.
|
|
|
|
|
match current {
|
|
|
|
|
TomlValue::Table(table) => {
|
|
|
|
|
current = table
|
|
|
|
|
.entry(segment.to_string())
|
|
|
|
|
.or_insert_with(|| TomlValue::Table(Table::new()));
|
|
|
|
|
}
|
|
|
|
|
_ => {
|
|
|
|
|
*current = TomlValue::Table(Table::new());
|
|
|
|
|
if let TomlValue::Table(tbl) = current {
|
|
|
|
|
current = tbl
|
|
|
|
|
.entry(segment.to_string())
|
|
|
|
|
.or_insert_with(|| TomlValue::Table(Table::new()));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
/// Base config deserialized from ~/.codex/config.toml.
|
2025-09-11 23:44:17 -07:00
|
|
|
#[derive(Deserialize, Debug, Clone, Default, PartialEq)]
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
pub struct ConfigToml {
|
|
|
|
|
/// Optional override of model selection.
|
|
|
|
|
pub model: Option<String>,
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
/// Review model override used by the `/review` feature.
|
|
|
|
|
pub review_model: Option<String>,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
|
2025-05-07 17:38:28 -07:00
|
|
|
/// Provider to use from the model_providers map.
|
|
|
|
|
pub model_provider: Option<String>,
|
|
|
|
|
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
/// Size of the context window for the model, in tokens.
|
|
|
|
|
pub model_context_window: Option<u64>,
|
|
|
|
|
|
|
|
|
|
/// Maximum number of output tokens.
|
|
|
|
|
pub model_max_output_tokens: Option<u64>,
|
|
|
|
|
|
2025-09-12 13:07:10 -07:00
|
|
|
/// Token usage threshold triggering auto-compaction of conversation history.
|
|
|
|
|
pub model_auto_compact_token_limit: Option<i64>,
|
|
|
|
|
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
/// Default approval policy for executing commands.
|
|
|
|
|
pub approval_policy: Option<AskForApproval>,
|
|
|
|
|
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
#[serde(default)]
|
|
|
|
|
pub shell_environment_policy: ShellEnvironmentPolicyToml,
|
|
|
|
|
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
/// Sandbox mode to use.
|
|
|
|
|
pub sandbox_mode: Option<SandboxMode>,
|
|
|
|
|
|
|
|
|
|
/// Sandbox configuration to apply if `sandbox` is `WorkspaceWrite`.
|
2025-08-07 01:30:13 -07:00
|
|
|
pub sandbox_workspace_write: Option<SandboxWorkspaceWrite>,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
|
feat: configurable notifications in the Rust CLI (#793)
With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:
```rust
pub(crate) enum UserNotification {
#[serde(rename_all = "kebab-case")]
AgentTurnComplete {
turn_id: String,
/// Messages that the user sent to the agent to initiate the turn.
input_messages: Vec<String>,
/// The last message sent by the assistant in the turn.
last_assistant_message: Option<String>,
},
}
```
This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.
For example, I have the following in my `~/.codex/config.toml`:
```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```
I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:
```python
#!/usr/bin/env python3
import json
import subprocess
import sys
def main() -> int:
if len(sys.argv) != 2:
print("Usage: notify.py <NOTIFICATION_JSON>")
return 1
try:
notification = json.loads(sys.argv[1])
except json.JSONDecodeError:
return 1
match notification_type := notification.get("type"):
case "agent-turn-complete":
assistant_message = notification.get("last-assistant-message")
if assistant_message:
title = f"Codex: {assistant_message}"
else:
title = "Codex: Turn Complete!"
input_messages = notification.get("input_messages", [])
message = " ".join(input_messages)
title += message
case _:
print(f"not sending a push notification for: {notification_type}")
return 0
subprocess.check_output(
[
"terminal-notifier",
"-title",
title,
"-message",
message,
"-group",
"codex",
"-ignoreDnD",
"-activate",
"com.googlecode.iterm2",
]
)
return 0
if __name__ == "__main__":
sys.exit(main())
```
For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:
* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
2025-05-02 19:48:13 -07:00
|
|
|
/// Optional external command to spawn for end-user notifications.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub notify: Option<Vec<String>>,
|
|
|
|
|
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
/// System instructions.
|
|
|
|
|
pub instructions: Option<String>,
|
feat: support mcp_servers in config.toml (#829)
This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.
(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)
`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.
This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)
To test, I added the following to my `~/.codex/config.toml`:
```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```
And then I ran the following:
```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):
This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph
Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```
Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
2025-05-06 15:47:59 -07:00
|
|
|
|
|
|
|
|
/// Definition for MCP servers that Codex can reach out to for tool calls.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub mcp_servers: HashMap<String, McpServerConfig>,
|
2025-05-07 17:38:28 -07:00
|
|
|
|
|
|
|
|
/// User-defined provider entries that extend/override the built-in list.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub model_providers: HashMap<String, ModelProviderInfo>,
|
2025-05-10 17:52:59 -07:00
|
|
|
|
|
|
|
|
/// Maximum number of bytes to include from an AGENTS.md project doc file.
|
|
|
|
|
pub project_doc_max_bytes: Option<usize>,
|
2025-05-13 16:52:52 -07:00
|
|
|
|
2025-10-01 11:19:59 -07:00
|
|
|
/// Ordered list of fallback filenames to look for when AGENTS.md is missing.
|
|
|
|
|
pub project_doc_fallback_filenames: Option<Vec<String>>,
|
|
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
/// Profile to use from the `profiles` map.
|
|
|
|
|
pub profile: Option<String>,
|
|
|
|
|
|
|
|
|
|
/// Named profiles to facilitate switching between different configurations.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub profiles: HashMap<String, ConfigProfile>,
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
|
|
|
|
|
/// Settings that govern if and what will be written to `~/.codex/history.jsonl`.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub history: Option<History>,
|
2025-05-16 11:33:08 -07:00
|
|
|
|
|
|
|
|
/// Optional URI-based file opener. If set, citations to files in the model
|
|
|
|
|
/// output will be hyperlinked using the specified URI scheme.
|
|
|
|
|
pub file_opener: Option<UriBasedFileOpener>,
|
2025-05-16 16:16:50 -07:00
|
|
|
|
|
|
|
|
/// Collection of settings that are specific to the TUI.
|
|
|
|
|
pub tui: Option<Tui>,
|
2025-05-30 23:14:56 -07:00
|
|
|
|
|
|
|
|
/// When set to `true`, `AgentReasoning` events will be hidden from the
|
|
|
|
|
/// UI/output. Defaults to `false`.
|
|
|
|
|
pub hide_agent_reasoning: Option<bool>,
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
|
2025-08-05 01:56:13 -07:00
|
|
|
/// When set to `true`, `AgentReasoningRawContentEvent` events will be shown in the UI/output.
|
|
|
|
|
/// Defaults to `false`.
|
|
|
|
|
pub show_raw_agent_reasoning: Option<bool>,
|
|
|
|
|
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
pub model_reasoning_effort: Option<ReasoningEffort>,
|
|
|
|
|
pub model_reasoning_summary: Option<ReasoningSummary>,
|
2025-08-22 17:12:10 +01:00
|
|
|
/// Optional verbosity control for GPT-5 models (Responses API `text.verbosity`).
|
|
|
|
|
pub model_verbosity: Option<Verbosity>,
|
2025-07-10 14:30:33 -07:00
|
|
|
|
|
|
|
|
/// Override to force-enable reasoning summaries for the configured model.
|
|
|
|
|
pub model_supports_reasoning_summaries: Option<bool>,
|
2025-07-11 13:30:11 -04:00
|
|
|
|
2025-09-04 11:00:01 -07:00
|
|
|
/// Override to force reasoning summary format for the configured model.
|
|
|
|
|
pub model_reasoning_summary_format: Option<ReasoningSummaryFormat>,
|
|
|
|
|
|
2025-07-11 13:30:11 -04:00
|
|
|
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
|
|
|
|
pub chatgpt_base_url: Option<String>,
|
2025-07-18 17:04:04 -07:00
|
|
|
|
2025-07-22 09:42:22 -07:00
|
|
|
/// Experimental path to a file whose contents replace the built-in BASE_INSTRUCTIONS.
|
|
|
|
|
pub experimental_instructions_file: Option<PathBuf>,
|
2025-08-01 09:55:23 -07:00
|
|
|
|
2025-08-22 18:10:55 -07:00
|
|
|
pub experimental_use_exec_command_tool: Option<bool>,
|
Unified execution (#3288)
## Unified PTY-Based Exec Tool
Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`
- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
isolation, timeouts, and truncation behavior.
2025-09-10 17:38:11 -07:00
|
|
|
pub experimental_use_unified_exec_tool: Option<bool>,
|
2025-09-26 10:13:37 -07:00
|
|
|
pub experimental_use_rmcp_client: Option<bool>,
|
2025-08-22 18:10:55 -07:00
|
|
|
|
2025-08-07 09:27:38 -07:00
|
|
|
pub projects: Option<HashMap<String, ProjectConfig>>,
|
2025-08-18 20:22:48 -07:00
|
|
|
|
2025-08-23 22:58:56 -07:00
|
|
|
/// Nested tools section for feature toggles
|
|
|
|
|
pub tools: Option<ToolsToml>,
|
2025-08-28 12:54:12 -07:00
|
|
|
|
|
|
|
|
/// When true, disables burst-paste detection for typed input entirely.
|
|
|
|
|
/// All characters are inserted as they are received, and no buffering
|
|
|
|
|
/// or placeholder replacement will occur for fast keypress bursts.
|
|
|
|
|
pub disable_paste_burst: Option<bool>,
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
|
|
|
|
|
/// OTEL configuration.
|
|
|
|
|
pub otel: Option<crate::config_types::OtelConfigToml>,
|
2025-10-04 17:41:40 -07:00
|
|
|
|
|
|
|
|
/// Tracks whether the Windows onboarding screen has been acknowledged.
|
|
|
|
|
pub windows_wsl_setup_acknowledged: Option<bool>,
|
2025-08-07 09:27:38 -07:00
|
|
|
}
|
|
|
|
|
|
2025-09-04 16:26:41 -07:00
|
|
|
impl From<ConfigToml> for UserSavedConfig {
|
|
|
|
|
fn from(config_toml: ConfigToml) -> Self {
|
|
|
|
|
let profiles = config_toml
|
|
|
|
|
.profiles
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|(k, v)| (k, v.into()))
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
Self {
|
|
|
|
|
approval_policy: config_toml.approval_policy,
|
|
|
|
|
sandbox_mode: config_toml.sandbox_mode,
|
|
|
|
|
sandbox_settings: config_toml.sandbox_workspace_write.map(From::from),
|
|
|
|
|
model: config_toml.model,
|
|
|
|
|
model_reasoning_effort: config_toml.model_reasoning_effort,
|
|
|
|
|
model_reasoning_summary: config_toml.model_reasoning_summary,
|
|
|
|
|
model_verbosity: config_toml.model_verbosity,
|
|
|
|
|
tools: config_toml.tools.map(From::from),
|
|
|
|
|
profile: config_toml.profile,
|
|
|
|
|
profiles,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-07 09:27:38 -07:00
|
|
|
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
|
|
|
|
pub struct ProjectConfig {
|
|
|
|
|
pub trust_level: Option<String>,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
}
|
|
|
|
|
|
2025-09-11 23:44:17 -07:00
|
|
|
#[derive(Deserialize, Debug, Clone, Default, PartialEq)]
|
2025-08-23 22:58:56 -07:00
|
|
|
pub struct ToolsToml {
|
|
|
|
|
#[serde(default, alias = "web_search_request")]
|
|
|
|
|
pub web_search: Option<bool>,
|
2025-08-27 17:41:23 -07:00
|
|
|
|
|
|
|
|
/// Enable the `view_image` tool that lets the agent attach local images.
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub view_image: Option<bool>,
|
2025-08-23 22:58:56 -07:00
|
|
|
}
|
|
|
|
|
|
2025-09-04 16:26:41 -07:00
|
|
|
impl From<ToolsToml> for Tools {
|
|
|
|
|
fn from(tools_toml: ToolsToml) -> Self {
|
|
|
|
|
Self {
|
|
|
|
|
web_search: tools_toml.web_search,
|
|
|
|
|
view_image: tools_toml.view_image,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
impl ConfigToml {
|
|
|
|
|
/// Derive the effective sandbox policy from the configuration.
|
|
|
|
|
fn derive_sandbox_policy(&self, sandbox_mode_override: Option<SandboxMode>) -> SandboxPolicy {
|
|
|
|
|
let resolved_sandbox_mode = sandbox_mode_override
|
|
|
|
|
.or(self.sandbox_mode)
|
|
|
|
|
.unwrap_or_default();
|
|
|
|
|
match resolved_sandbox_mode {
|
|
|
|
|
SandboxMode::ReadOnly => SandboxPolicy::new_read_only_policy(),
|
|
|
|
|
SandboxMode::WorkspaceWrite => match self.sandbox_workspace_write.as_ref() {
|
2025-08-07 01:30:13 -07:00
|
|
|
Some(SandboxWorkspaceWrite {
|
2025-08-07 00:17:00 -07:00
|
|
|
writable_roots,
|
|
|
|
|
network_access,
|
|
|
|
|
exclude_tmpdir_env_var,
|
|
|
|
|
exclude_slash_tmp,
|
|
|
|
|
}) => SandboxPolicy::WorkspaceWrite {
|
|
|
|
|
writable_roots: writable_roots.clone(),
|
|
|
|
|
network_access: *network_access,
|
|
|
|
|
exclude_tmpdir_env_var: *exclude_tmpdir_env_var,
|
|
|
|
|
exclude_slash_tmp: *exclude_slash_tmp,
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
},
|
|
|
|
|
None => SandboxPolicy::new_workspace_write_policy(),
|
|
|
|
|
},
|
|
|
|
|
SandboxMode::DangerFullAccess => SandboxPolicy::DangerFullAccess,
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-08-07 09:27:38 -07:00
|
|
|
|
|
|
|
|
pub fn is_cwd_trusted(&self, resolved_cwd: &Path) -> bool {
|
|
|
|
|
let projects = self.projects.clone().unwrap_or_default();
|
|
|
|
|
|
2025-08-22 13:54:51 -07:00
|
|
|
let is_path_trusted = |path: &Path| {
|
|
|
|
|
let path_str = path.to_string_lossy().to_string();
|
|
|
|
|
projects
|
|
|
|
|
.get(&path_str)
|
|
|
|
|
.map(|p| p.trust_level.as_deref() == Some("trusted"))
|
|
|
|
|
.unwrap_or(false)
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Fast path: exact cwd match
|
|
|
|
|
if is_path_trusted(resolved_cwd) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If cwd lives inside a git worktree, check whether the root git project
|
|
|
|
|
// (the primary repository working directory) is trusted. This lets
|
|
|
|
|
// worktrees inherit trust from the main project.
|
|
|
|
|
if let Some(root_project) = resolve_root_git_project_for_trust(resolved_cwd) {
|
|
|
|
|
return is_path_trusted(&root_project);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
false
|
2025-08-07 09:27:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn get_config_profile(
|
|
|
|
|
&self,
|
|
|
|
|
override_profile: Option<String>,
|
|
|
|
|
) -> Result<ConfigProfile, std::io::Error> {
|
|
|
|
|
let profile = override_profile.or_else(|| self.profile.clone());
|
|
|
|
|
|
|
|
|
|
match profile {
|
|
|
|
|
Some(key) => {
|
|
|
|
|
if let Some(profile) = self.profiles.get(key.as_str()) {
|
|
|
|
|
return Ok(profile.clone());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Err(std::io::Error::new(
|
|
|
|
|
std::io::ErrorKind::NotFound,
|
|
|
|
|
format!("config profile `{key}` not found"),
|
|
|
|
|
))
|
|
|
|
|
}
|
|
|
|
|
None => Ok(ConfigProfile::default()),
|
|
|
|
|
}
|
|
|
|
|
}
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
}
|
|
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
/// Optional overrides for user configuration (e.g., from CLI flags).
|
|
|
|
|
#[derive(Default, Debug, Clone)]
|
|
|
|
|
pub struct ConfigOverrides {
|
|
|
|
|
pub model: Option<String>,
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
pub review_model: Option<String>,
|
2025-05-04 10:57:12 -07:00
|
|
|
pub cwd: Option<PathBuf>,
|
2025-04-27 21:47:50 -07:00
|
|
|
pub approval_policy: Option<AskForApproval>,
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
pub sandbox_mode: Option<SandboxMode>,
|
2025-05-13 16:52:52 -07:00
|
|
|
pub model_provider: Option<String>,
|
|
|
|
|
pub config_profile: Option<String>,
|
2025-05-22 21:52:28 -07:00
|
|
|
pub codex_linux_sandbox_exe: Option<PathBuf>,
|
2025-07-22 09:42:22 -07:00
|
|
|
pub base_instructions: Option<String>,
|
2025-07-29 11:22:02 -07:00
|
|
|
pub include_plan_tool: Option<bool>,
|
2025-08-15 11:55:53 -04:00
|
|
|
pub include_apply_patch_tool: Option<bool>,
|
2025-08-27 17:41:23 -07:00
|
|
|
pub include_view_image_tool: Option<bool>,
|
2025-08-05 14:42:49 -07:00
|
|
|
pub show_raw_agent_reasoning: Option<bool>,
|
2025-08-23 22:58:56 -07:00
|
|
|
pub tools_web_search_request: Option<bool>,
|
2025-04-27 21:47:50 -07:00
|
|
|
}
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
impl Config {
|
2025-05-15 00:30:13 -07:00
|
|
|
/// Meant to be used exclusively for tests: `load_with_overrides()` should
|
|
|
|
|
/// be used in all other cases.
|
|
|
|
|
pub fn load_from_base_config_with_overrides(
|
2025-05-07 17:38:28 -07:00
|
|
|
cfg: ConfigToml,
|
|
|
|
|
overrides: ConfigOverrides,
|
2025-05-15 00:30:13 -07:00
|
|
|
codex_home: PathBuf,
|
2025-05-07 17:38:28 -07:00
|
|
|
) -> std::io::Result<Self> {
|
2025-07-22 09:42:22 -07:00
|
|
|
let user_instructions = Self::load_instructions(Some(&codex_home));
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
// Destructure ConfigOverrides fully to ensure all overrides are applied.
|
|
|
|
|
let ConfigOverrides {
|
|
|
|
|
model,
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
review_model: override_review_model,
|
2025-05-04 10:57:12 -07:00
|
|
|
cwd,
|
2025-04-27 21:47:50 -07:00
|
|
|
approval_policy,
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
sandbox_mode,
|
2025-05-13 16:52:52 -07:00
|
|
|
model_provider,
|
|
|
|
|
config_profile: config_profile_key,
|
2025-05-22 21:52:28 -07:00
|
|
|
codex_linux_sandbox_exe,
|
2025-07-22 09:42:22 -07:00
|
|
|
base_instructions,
|
2025-07-29 11:22:02 -07:00
|
|
|
include_plan_tool,
|
2025-08-15 11:55:53 -04:00
|
|
|
include_apply_patch_tool,
|
2025-08-27 17:41:23 -07:00
|
|
|
include_view_image_tool,
|
2025-08-05 14:42:49 -07:00
|
|
|
show_raw_agent_reasoning,
|
2025-08-23 22:58:56 -07:00
|
|
|
tools_web_search_request: override_tools_web_search_request,
|
2025-04-27 21:47:50 -07:00
|
|
|
} = overrides;
|
|
|
|
|
|
2025-09-10 13:53:46 -07:00
|
|
|
let active_profile_name = config_profile_key
|
|
|
|
|
.as_ref()
|
|
|
|
|
.or(cfg.profile.as_ref())
|
|
|
|
|
.cloned();
|
|
|
|
|
let config_profile = match active_profile_name.as_ref() {
|
2025-05-13 16:52:52 -07:00
|
|
|
Some(key) => cfg
|
|
|
|
|
.profiles
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
.get(key)
|
2025-05-13 16:52:52 -07:00
|
|
|
.ok_or_else(|| {
|
|
|
|
|
std::io::Error::new(
|
|
|
|
|
std::io::ErrorKind::NotFound,
|
|
|
|
|
format!("config profile `{key}` not found"),
|
|
|
|
|
)
|
|
|
|
|
})?
|
|
|
|
|
.clone(),
|
|
|
|
|
None => ConfigProfile::default(),
|
|
|
|
|
};
|
|
|
|
|
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
let sandbox_policy = cfg.derive_sandbox_policy(sandbox_mode);
|
2025-04-27 21:47:50 -07:00
|
|
|
|
2025-05-07 17:38:28 -07:00
|
|
|
let mut model_providers = built_in_model_providers();
|
|
|
|
|
// Merge user-defined providers into the built-in list.
|
|
|
|
|
for (key, provider) in cfg.model_providers.into_iter() {
|
|
|
|
|
model_providers.entry(key).or_insert(provider);
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
let model_provider_id = model_provider
|
|
|
|
|
.or(config_profile.model_provider)
|
2025-05-07 17:38:28 -07:00
|
|
|
.or(cfg.model_provider)
|
|
|
|
|
.unwrap_or_else(|| "openai".to_string());
|
|
|
|
|
let model_provider = model_providers
|
2025-05-08 21:46:06 -07:00
|
|
|
.get(&model_provider_id)
|
2025-05-07 17:38:28 -07:00
|
|
|
.ok_or_else(|| {
|
|
|
|
|
std::io::Error::new(
|
|
|
|
|
std::io::ErrorKind::NotFound,
|
2025-05-08 21:46:06 -07:00
|
|
|
format!("Model provider `{model_provider_id}` not found"),
|
2025-05-07 17:38:28 -07:00
|
|
|
)
|
|
|
|
|
})?
|
|
|
|
|
.clone();
|
|
|
|
|
|
2025-08-28 19:24:38 -07:00
|
|
|
let shell_environment_policy = cfg.shell_environment_policy.into();
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
|
2025-05-12 08:45:46 -07:00
|
|
|
let resolved_cwd = {
|
|
|
|
|
use std::env;
|
|
|
|
|
|
|
|
|
|
match cwd {
|
|
|
|
|
None => {
|
|
|
|
|
tracing::info!("cwd not set, using current dir");
|
|
|
|
|
env::current_dir()?
|
|
|
|
|
}
|
|
|
|
|
Some(p) if p.is_absolute() => p,
|
|
|
|
|
Some(p) => {
|
|
|
|
|
// Resolve relative path against the current working directory.
|
|
|
|
|
tracing::info!("cwd is relative, resolving against current dir");
|
|
|
|
|
let mut current = env::current_dir()?;
|
|
|
|
|
current.push(p);
|
|
|
|
|
current
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2025-08-28 19:24:38 -07:00
|
|
|
let history = cfg.history.unwrap_or_default();
|
2025-08-23 22:58:56 -07:00
|
|
|
|
|
|
|
|
let tools_web_search_request = override_tools_web_search_request
|
|
|
|
|
.or(cfg.tools.as_ref().and_then(|t| t.web_search))
|
|
|
|
|
.unwrap_or(false);
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
|
2025-08-27 17:41:23 -07:00
|
|
|
let include_view_image_tool = include_view_image_tool
|
|
|
|
|
.or(cfg.tools.as_ref().and_then(|t| t.view_image))
|
|
|
|
|
.unwrap_or(true);
|
|
|
|
|
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
let model = model
|
|
|
|
|
.or(config_profile.model)
|
|
|
|
|
.or(cfg.model)
|
|
|
|
|
.unwrap_or_else(default_model);
|
2025-09-05 16:56:58 -07:00
|
|
|
|
2025-09-14 15:45:15 -07:00
|
|
|
let mut model_family =
|
|
|
|
|
find_family_for_model(&model).unwrap_or_else(|| derive_default_model_family(&model));
|
2025-08-04 23:50:03 -07:00
|
|
|
|
2025-09-05 16:56:58 -07:00
|
|
|
if let Some(supports_reasoning_summaries) = cfg.model_supports_reasoning_summaries {
|
|
|
|
|
model_family.supports_reasoning_summaries = supports_reasoning_summaries;
|
|
|
|
|
}
|
|
|
|
|
if let Some(model_reasoning_summary_format) = cfg.model_reasoning_summary_format {
|
|
|
|
|
model_family.reasoning_summary_format = model_reasoning_summary_format;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-04 23:50:03 -07:00
|
|
|
let openai_model_info = get_model_info(&model_family);
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
let model_context_window = cfg
|
|
|
|
|
.model_context_window
|
|
|
|
|
.or_else(|| openai_model_info.as_ref().map(|info| info.context_window));
|
|
|
|
|
let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| {
|
|
|
|
|
openai_model_info
|
|
|
|
|
.as_ref()
|
|
|
|
|
.map(|info| info.max_output_tokens)
|
|
|
|
|
});
|
2025-09-12 13:07:10 -07:00
|
|
|
let model_auto_compact_token_limit = cfg.model_auto_compact_token_limit.or_else(|| {
|
|
|
|
|
openai_model_info
|
|
|
|
|
.as_ref()
|
|
|
|
|
.and_then(|info| info.auto_compact_token_limit)
|
|
|
|
|
});
|
2025-07-18 17:04:04 -07:00
|
|
|
|
2025-07-29 10:06:05 -07:00
|
|
|
// Load base instructions override from a file if specified. If the
|
|
|
|
|
// path is relative, resolve it against the effective cwd so the
|
|
|
|
|
// behaviour matches other path-like config values.
|
2025-08-04 09:34:46 -07:00
|
|
|
let experimental_instructions_path = config_profile
|
|
|
|
|
.experimental_instructions_file
|
|
|
|
|
.as_ref()
|
|
|
|
|
.or(cfg.experimental_instructions_file.as_ref());
|
|
|
|
|
let file_base_instructions =
|
|
|
|
|
Self::get_base_instructions(experimental_instructions_path, &resolved_cwd)?;
|
2025-07-29 10:06:05 -07:00
|
|
|
let base_instructions = base_instructions.or(file_base_instructions);
|
2025-07-22 09:42:22 -07:00
|
|
|
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
// Default review model when not set in config; allow CLI override to take precedence.
|
|
|
|
|
let review_model = override_review_model
|
|
|
|
|
.or(cfg.review_model)
|
|
|
|
|
.unwrap_or_else(default_review_model);
|
|
|
|
|
|
2025-05-07 17:38:28 -07:00
|
|
|
let config = Self {
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
model,
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
review_model,
|
2025-08-04 23:50:03 -07:00
|
|
|
model_family,
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
model_context_window,
|
|
|
|
|
model_max_output_tokens,
|
2025-09-12 13:07:10 -07:00
|
|
|
model_auto_compact_token_limit,
|
2025-05-08 21:46:06 -07:00
|
|
|
model_provider_id,
|
2025-05-07 17:38:28 -07:00
|
|
|
model_provider,
|
2025-05-12 08:45:46 -07:00
|
|
|
cwd: resolved_cwd,
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
approval_policy: approval_policy
|
2025-05-13 16:52:52 -07:00
|
|
|
.or(config_profile.approval_policy)
|
fix: overhaul SandboxPolicy and config loading in Rust (#732)
Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
|
|
|
.or(cfg.approval_policy)
|
|
|
|
|
.unwrap_or_else(AskForApproval::default),
|
|
|
|
|
sandbox_policy,
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
shell_environment_policy,
|
feat: configurable notifications in the Rust CLI (#793)
With this change, you can specify a program that will be executed to get
notified about events generated by Codex. The notification info will be
packaged as a JSON object. The supported notification types are defined
by the `UserNotification` enum introduced in this PR. Initially, it
contains only one variant, `AgentTurnComplete`:
```rust
pub(crate) enum UserNotification {
#[serde(rename_all = "kebab-case")]
AgentTurnComplete {
turn_id: String,
/// Messages that the user sent to the agent to initiate the turn.
input_messages: Vec<String>,
/// The last message sent by the assistant in the turn.
last_assistant_message: Option<String>,
},
}
```
This is intended to support the common case when a "turn" ends, which
often means it is now your chance to give Codex further instructions.
For example, I have the following in my `~/.codex/config.toml`:
```toml
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
```
I created my own custom notifier script that calls out to
[terminal-notifier](https://github.com/julienXX/terminal-notifier) to
show a desktop push notification on macOS. Contents of `notify.py`:
```python
#!/usr/bin/env python3
import json
import subprocess
import sys
def main() -> int:
if len(sys.argv) != 2:
print("Usage: notify.py <NOTIFICATION_JSON>")
return 1
try:
notification = json.loads(sys.argv[1])
except json.JSONDecodeError:
return 1
match notification_type := notification.get("type"):
case "agent-turn-complete":
assistant_message = notification.get("last-assistant-message")
if assistant_message:
title = f"Codex: {assistant_message}"
else:
title = "Codex: Turn Complete!"
input_messages = notification.get("input_messages", [])
message = " ".join(input_messages)
title += message
case _:
print(f"not sending a push notification for: {notification_type}")
return 0
subprocess.check_output(
[
"terminal-notifier",
"-title",
title,
"-message",
message,
"-group",
"codex",
"-ignoreDnD",
"-activate",
"com.googlecode.iterm2",
]
)
return 0
if __name__ == "__main__":
sys.exit(main())
```
For reference, here are related PRs that tried to add this functionality
to the TypeScript version of the Codex CLI:
* https://github.com/openai/codex/pull/160
* https://github.com/openai/codex/pull/498
2025-05-02 19:48:13 -07:00
|
|
|
notify: cfg.notify,
|
2025-07-22 09:42:22 -07:00
|
|
|
user_instructions,
|
|
|
|
|
base_instructions,
|
feat: support mcp_servers in config.toml (#829)
This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.
(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)
`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.
This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)
To test, I added the following to my `~/.codex/config.toml`:
```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```
And then I ran the following:
```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):
This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph
Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```
Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
2025-05-06 15:47:59 -07:00
|
|
|
mcp_servers: cfg.mcp_servers,
|
2025-05-07 17:38:28 -07:00
|
|
|
model_providers,
|
2025-05-10 17:52:59 -07:00
|
|
|
project_doc_max_bytes: cfg.project_doc_max_bytes.unwrap_or(PROJECT_DOC_MAX_BYTES),
|
2025-10-01 11:19:59 -07:00
|
|
|
project_doc_fallback_filenames: cfg
|
|
|
|
|
.project_doc_fallback_filenames
|
|
|
|
|
.unwrap_or_default()
|
|
|
|
|
.into_iter()
|
|
|
|
|
.filter_map(|name| {
|
|
|
|
|
let trimmed = name.trim();
|
|
|
|
|
if trimmed.is_empty() {
|
|
|
|
|
None
|
|
|
|
|
} else {
|
|
|
|
|
Some(trimmed.to_string())
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.collect(),
|
2025-05-15 00:30:13 -07:00
|
|
|
codex_home,
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
history,
|
2025-05-16 11:33:08 -07:00
|
|
|
file_opener: cfg.file_opener.unwrap_or(UriBasedFileOpener::VsCode),
|
2025-05-22 21:52:28 -07:00
|
|
|
codex_linux_sandbox_exe,
|
2025-05-30 23:14:56 -07:00
|
|
|
|
2025-08-04 17:03:24 -07:00
|
|
|
hide_agent_reasoning: cfg.hide_agent_reasoning.unwrap_or(false),
|
2025-08-05 11:31:11 -07:00
|
|
|
show_raw_agent_reasoning: cfg
|
|
|
|
|
.show_raw_agent_reasoning
|
2025-08-05 14:42:49 -07:00
|
|
|
.or(show_raw_agent_reasoning)
|
2025-08-05 11:31:11 -07:00
|
|
|
.unwrap_or(false),
|
2025-07-08 22:05:22 +03:00
|
|
|
model_reasoning_effort: config_profile
|
|
|
|
|
.model_reasoning_effort
|
2025-09-12 12:06:33 -07:00
|
|
|
.or(cfg.model_reasoning_effort),
|
2025-07-08 22:05:22 +03:00
|
|
|
model_reasoning_summary: config_profile
|
|
|
|
|
.model_reasoning_summary
|
|
|
|
|
.or(cfg.model_reasoning_summary)
|
|
|
|
|
.unwrap_or_default(),
|
2025-08-22 17:12:10 +01:00
|
|
|
model_verbosity: config_profile.model_verbosity.or(cfg.model_verbosity),
|
2025-07-11 13:30:11 -04:00
|
|
|
chatgpt_base_url: config_profile
|
|
|
|
|
.chatgpt_base_url
|
2025-08-28 19:24:38 -07:00
|
|
|
.or(cfg.chatgpt_base_url)
|
2025-07-11 13:30:11 -04:00
|
|
|
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),
|
2025-07-29 11:22:02 -07:00
|
|
|
include_plan_tool: include_plan_tool.unwrap_or(false),
|
2025-08-22 13:42:34 -07:00
|
|
|
include_apply_patch_tool: include_apply_patch_tool.unwrap_or(false),
|
2025-08-23 22:58:56 -07:00
|
|
|
tools_web_search_request,
|
2025-08-22 18:10:55 -07:00
|
|
|
use_experimental_streamable_shell_tool: cfg
|
|
|
|
|
.experimental_use_exec_command_tool
|
|
|
|
|
.unwrap_or(false),
|
Unified execution (#3288)
## Unified PTY-Based Exec Tool
Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`
- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
isolation, timeouts, and truncation behavior.
2025-09-10 17:38:11 -07:00
|
|
|
use_experimental_unified_exec_tool: cfg
|
|
|
|
|
.experimental_use_unified_exec_tool
|
2025-09-11 09:19:12 -07:00
|
|
|
.unwrap_or(false),
|
2025-09-26 10:13:37 -07:00
|
|
|
use_experimental_use_rmcp_client: cfg.experimental_use_rmcp_client.unwrap_or(false),
|
2025-08-27 17:41:23 -07:00
|
|
|
include_view_image_tool,
|
2025-09-10 13:53:46 -07:00
|
|
|
active_profile: active_profile_name,
|
2025-10-04 17:41:40 -07:00
|
|
|
windows_wsl_setup_acknowledged: cfg.windows_wsl_setup_acknowledged.unwrap_or(false),
|
2025-08-28 12:54:12 -07:00
|
|
|
disable_paste_burst: cfg.disable_paste_burst.unwrap_or(false),
|
2025-09-15 10:22:02 -07:00
|
|
|
tui_notifications: cfg
|
|
|
|
|
.tui
|
|
|
|
|
.as_ref()
|
|
|
|
|
.map(|t| t.notifications.clone())
|
|
|
|
|
.unwrap_or_default(),
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
otel: {
|
|
|
|
|
let t: OtelConfigToml = cfg.otel.unwrap_or_default();
|
|
|
|
|
let log_user_prompt = t.log_user_prompt.unwrap_or(false);
|
|
|
|
|
let environment = t
|
|
|
|
|
.environment
|
|
|
|
|
.unwrap_or(DEFAULT_OTEL_ENVIRONMENT.to_string());
|
|
|
|
|
let exporter = t.exporter.unwrap_or(OtelExporterKind::None);
|
|
|
|
|
OtelConfig {
|
|
|
|
|
log_user_prompt,
|
|
|
|
|
environment,
|
|
|
|
|
exporter,
|
|
|
|
|
}
|
|
|
|
|
},
|
2025-05-07 17:38:28 -07:00
|
|
|
};
|
|
|
|
|
Ok(config)
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
}
|
|
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
fn load_instructions(codex_dir: Option<&Path>) -> Option<String> {
|
|
|
|
|
let mut p = match codex_dir {
|
|
|
|
|
Some(p) => p.to_path_buf(),
|
|
|
|
|
None => return None,
|
|
|
|
|
};
|
|
|
|
|
|
2025-08-06 11:48:03 -07:00
|
|
|
p.push("AGENTS.md");
|
2025-05-12 17:24:44 -07:00
|
|
|
std::fs::read_to_string(&p).ok().and_then(|s| {
|
|
|
|
|
let s = s.trim();
|
|
|
|
|
if s.is_empty() {
|
|
|
|
|
None
|
|
|
|
|
} else {
|
|
|
|
|
Some(s.to_string())
|
|
|
|
|
}
|
|
|
|
|
})
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
}
|
2025-07-22 09:42:22 -07:00
|
|
|
|
2025-07-29 10:06:05 -07:00
|
|
|
fn get_base_instructions(
|
|
|
|
|
path: Option<&PathBuf>,
|
|
|
|
|
cwd: &Path,
|
|
|
|
|
) -> std::io::Result<Option<String>> {
|
|
|
|
|
let p = match path.as_ref() {
|
|
|
|
|
None => return Ok(None),
|
|
|
|
|
Some(p) => p,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Resolve relative paths against the provided cwd to make CLI
|
|
|
|
|
// overrides consistent regardless of where the process was launched
|
|
|
|
|
// from.
|
|
|
|
|
let full_path = if p.is_relative() {
|
|
|
|
|
cwd.join(p)
|
|
|
|
|
} else {
|
|
|
|
|
p.to_path_buf()
|
|
|
|
|
};
|
2025-07-22 09:42:22 -07:00
|
|
|
|
2025-07-29 10:06:05 -07:00
|
|
|
let contents = std::fs::read_to_string(&full_path).map_err(|e| {
|
|
|
|
|
std::io::Error::new(
|
|
|
|
|
e.kind(),
|
|
|
|
|
format!(
|
|
|
|
|
"failed to read experimental instructions file {}: {e}",
|
|
|
|
|
full_path.display()
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
let s = contents.trim().to_string();
|
|
|
|
|
if s.is_empty() {
|
|
|
|
|
Err(std::io::Error::new(
|
|
|
|
|
std::io::ErrorKind::InvalidData,
|
|
|
|
|
format!(
|
|
|
|
|
"experimental instructions file is empty: {}",
|
|
|
|
|
full_path.display()
|
|
|
|
|
),
|
|
|
|
|
))
|
|
|
|
|
} else {
|
|
|
|
|
Ok(Some(s))
|
|
|
|
|
}
|
2025-07-22 09:42:22 -07:00
|
|
|
}
|
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)
As stated in `codex-rs/README.md`:
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.
To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:
- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.
Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
2025-04-24 13:31:40 -07:00
|
|
|
}
|
fix: write logs to ~/.codex/log instead of /tmp (#669)
Previously, the Rust TUI was writing log files to `/tmp`, which is
world-readable and not available on Windows, so that isn't great.
This PR tries to clean things up by adding a function that provides the
path to the "Codex config dir," e.g., `~/.codex` (though I suppose we
could support `$CODEX_HOME` to override this?) and then defines other
paths in terms of the result of `codex_dir()`.
For example, `log_dir()` returns the folder where log files should be
written which is defined in terms of `codex_dir()`. I updated the TUI to
use this function. On UNIX, we even go so far as to `chmod 600` the log
file by default, though as noted in a comment, it's a bit tedious to do
the equivalent on Windows, so we just let that go for now.
This also changes the default logging level to `info` for `codex_core`
and `codex_tui` when `RUST_LOG` is not specified. I'm not really sure if
we should use a more verbose default (it may be helpful when debugging
user issues), though if so, we should probably also set up log rotation?
2025-04-25 17:37:41 -07:00
|
|
|
|
2025-04-27 21:47:50 -07:00
|
|
|
fn default_model() -> String {
|
|
|
|
|
OPENAI_DEFAULT_MODEL.to_string()
|
|
|
|
|
}
|
|
|
|
|
|
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 16:25:10 -07:00
|
|
|
fn default_review_model() -> String {
|
|
|
|
|
OPENAI_DEFAULT_REVIEW_MODEL.to_string()
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-15 00:30:13 -07:00
|
|
|
/// Returns the path to the Codex configuration directory, which can be
|
|
|
|
|
/// specified by the `CODEX_HOME` environment variable. If not set, defaults to
|
|
|
|
|
/// `~/.codex`.
|
|
|
|
|
///
|
|
|
|
|
/// - If `CODEX_HOME` is set, the value will be canonicalized and this
|
|
|
|
|
/// function will Err if the path does not exist.
|
|
|
|
|
/// - If `CODEX_HOME` is not set, this function does not verify that the
|
|
|
|
|
/// directory exists.
|
2025-07-22 15:54:33 -07:00
|
|
|
pub fn find_codex_home() -> std::io::Result<PathBuf> {
|
2025-05-15 00:30:13 -07:00
|
|
|
// Honor the `CODEX_HOME` environment variable when it is set to allow users
|
|
|
|
|
// (and tests) to override the default location.
|
2025-08-19 13:22:02 -07:00
|
|
|
if let Ok(val) = std::env::var("CODEX_HOME")
|
|
|
|
|
&& !val.is_empty()
|
|
|
|
|
{
|
|
|
|
|
return PathBuf::from(val).canonicalize();
|
2025-05-15 00:30:13 -07:00
|
|
|
}
|
|
|
|
|
|
fix: write logs to ~/.codex/log instead of /tmp (#669)
Previously, the Rust TUI was writing log files to `/tmp`, which is
world-readable and not available on Windows, so that isn't great.
This PR tries to clean things up by adding a function that provides the
path to the "Codex config dir," e.g., `~/.codex` (though I suppose we
could support `$CODEX_HOME` to override this?) and then defines other
paths in terms of the result of `codex_dir()`.
For example, `log_dir()` returns the folder where log files should be
written which is defined in terms of `codex_dir()`. I updated the TUI to
use this function. On UNIX, we even go so far as to `chmod 600` the log
file by default, though as noted in a comment, it's a bit tedious to do
the equivalent on Windows, so we just let that go for now.
This also changes the default logging level to `info` for `codex_core`
and `codex_tui` when `RUST_LOG` is not specified. I'm not really sure if
we should use a more verbose default (it may be helpful when debugging
user issues), though if so, we should probably also set up log rotation?
2025-04-25 17:37:41 -07:00
|
|
|
let mut p = home_dir().ok_or_else(|| {
|
|
|
|
|
std::io::Error::new(
|
|
|
|
|
std::io::ErrorKind::NotFound,
|
|
|
|
|
"Could not find home directory",
|
|
|
|
|
)
|
|
|
|
|
})?;
|
|
|
|
|
p.push(".codex");
|
|
|
|
|
Ok(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Returns the path to the folder where Codex logs are stored. Does not verify
|
|
|
|
|
/// that the directory exists.
|
2025-05-15 00:30:13 -07:00
|
|
|
pub fn log_dir(cfg: &Config) -> std::io::Result<PathBuf> {
|
|
|
|
|
let mut p = cfg.codex_home.clone();
|
fix: write logs to ~/.codex/log instead of /tmp (#669)
Previously, the Rust TUI was writing log files to `/tmp`, which is
world-readable and not available on Windows, so that isn't great.
This PR tries to clean things up by adding a function that provides the
path to the "Codex config dir," e.g., `~/.codex` (though I suppose we
could support `$CODEX_HOME` to override this?) and then defines other
paths in terms of the result of `codex_dir()`.
For example, `log_dir()` returns the folder where log files should be
written which is defined in terms of `codex_dir()`. I updated the TUI to
use this function. On UNIX, we even go so far as to `chmod 600` the log
file by default, though as noted in a comment, it's a bit tedious to do
the equivalent on Windows, so we just let that go for now.
This also changes the default logging level to `info` for `codex_core`
and `codex_tui` when `RUST_LOG` is not specified. I'm not really sure if
we should use a more verbose default (it may be helpful when debugging
user issues), though if so, we should probably also set up log rotation?
2025-04-25 17:37:41 -07:00
|
|
|
p.push("log");
|
|
|
|
|
Ok(p)
|
|
|
|
|
}
|
2025-04-29 18:42:52 -07:00
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
2025-05-20 11:55:25 -07:00
|
|
|
use crate::config_types::HistoryPersistence;
|
2025-09-18 11:25:09 -07:00
|
|
|
use crate::config_types::Notifications;
|
2025-05-20 11:55:25 -07:00
|
|
|
|
2025-04-29 18:42:52 -07:00
|
|
|
use super::*;
|
2025-05-13 16:52:52 -07:00
|
|
|
use pretty_assertions::assert_eq;
|
2025-09-11 15:04:29 -07:00
|
|
|
|
2025-09-22 10:30:59 -07:00
|
|
|
use std::time::Duration;
|
2025-05-13 16:52:52 -07:00
|
|
|
use tempfile::TempDir;
|
2025-04-29 18:42:52 -07:00
|
|
|
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
#[test]
|
|
|
|
|
fn test_toml_parsing() {
|
|
|
|
|
let history_with_persistence = r#"
|
|
|
|
|
[history]
|
|
|
|
|
persistence = "save-all"
|
|
|
|
|
"#;
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
let history_with_persistence_cfg = toml::from_str::<ConfigToml>(history_with_persistence)
|
|
|
|
|
.expect("TOML deserialization should succeed");
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
assert_eq!(
|
|
|
|
|
Some(History {
|
|
|
|
|
persistence: HistoryPersistence::SaveAll,
|
|
|
|
|
max_bytes: None,
|
|
|
|
|
}),
|
|
|
|
|
history_with_persistence_cfg.history
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let history_no_persistence = r#"
|
|
|
|
|
[history]
|
|
|
|
|
persistence = "none"
|
|
|
|
|
"#;
|
|
|
|
|
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
let history_no_persistence_cfg = toml::from_str::<ConfigToml>(history_no_persistence)
|
|
|
|
|
.expect("TOML deserialization should succeed");
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
assert_eq!(
|
|
|
|
|
Some(History {
|
|
|
|
|
persistence: HistoryPersistence::None,
|
|
|
|
|
max_bytes: None,
|
|
|
|
|
}),
|
|
|
|
|
history_no_persistence_cfg.history
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-18 11:25:09 -07:00
|
|
|
#[test]
|
|
|
|
|
fn tui_config_missing_notifications_field_defaults_to_disabled() {
|
|
|
|
|
let cfg = r#"
|
|
|
|
|
[tui]
|
|
|
|
|
"#;
|
|
|
|
|
|
|
|
|
|
let parsed = toml::from_str::<ConfigToml>(cfg)
|
|
|
|
|
.expect("TUI config without notifications should succeed");
|
|
|
|
|
let tui = parsed.tui.expect("config should include tui section");
|
|
|
|
|
|
|
|
|
|
assert_eq!(tui.notifications, Notifications::Enabled(false));
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-29 18:42:52 -07:00
|
|
|
#[test]
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
fn test_sandbox_config_parsing() {
|
|
|
|
|
let sandbox_full_access = r#"
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
sandbox_mode = "danger-full-access"
|
|
|
|
|
|
|
|
|
|
[sandbox_workspace_write]
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
network_access = false # This should be ignored.
|
|
|
|
|
"#;
|
|
|
|
|
let sandbox_full_access_cfg = toml::from_str::<ConfigToml>(sandbox_full_access)
|
|
|
|
|
.expect("TOML deserialization should succeed");
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
let sandbox_mode_override = None;
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
assert_eq!(
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
SandboxPolicy::DangerFullAccess,
|
|
|
|
|
sandbox_full_access_cfg.derive_sandbox_policy(sandbox_mode_override)
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
);
|
2025-04-29 18:42:52 -07:00
|
|
|
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
let sandbox_read_only = r#"
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
sandbox_mode = "read-only"
|
|
|
|
|
|
|
|
|
|
[sandbox_workspace_write]
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
network_access = true # This should be ignored.
|
|
|
|
|
"#;
|
|
|
|
|
|
|
|
|
|
let sandbox_read_only_cfg = toml::from_str::<ConfigToml>(sandbox_read_only)
|
|
|
|
|
.expect("TOML deserialization should succeed");
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
let sandbox_mode_override = None;
|
|
|
|
|
assert_eq!(
|
|
|
|
|
SandboxPolicy::ReadOnly,
|
|
|
|
|
sandbox_read_only_cfg.derive_sandbox_policy(sandbox_mode_override)
|
|
|
|
|
);
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
|
|
|
|
|
let sandbox_workspace_write = r#"
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
sandbox_mode = "workspace-write"
|
|
|
|
|
|
|
|
|
|
[sandbox_workspace_write]
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
writable_roots = [
|
2025-08-07 00:17:00 -07:00
|
|
|
"/my/workspace",
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
]
|
2025-08-07 00:17:00 -07:00
|
|
|
exclude_tmpdir_env_var = true
|
|
|
|
|
exclude_slash_tmp = true
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
"#;
|
2025-04-29 18:42:52 -07:00
|
|
|
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
let sandbox_workspace_write_cfg = toml::from_str::<ConfigToml>(sandbox_workspace_write)
|
|
|
|
|
.expect("TOML deserialization should succeed");
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
let sandbox_mode_override = None;
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
assert_eq!(
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
SandboxPolicy::WorkspaceWrite {
|
2025-08-07 00:17:00 -07:00
|
|
|
writable_roots: vec![PathBuf::from("/my/workspace")],
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
network_access: false,
|
2025-08-07 00:17:00 -07:00
|
|
|
exclude_tmpdir_env_var: true,
|
|
|
|
|
exclude_slash_tmp: true,
|
feat: add support for --sandbox flag (#1476)
On a high-level, we try to design `config.toml` so that you don't have
to "comment out a lot of stuff" when testing different options.
Previously, defining a sandbox policy was somewhat at odds with this
principle because you would define the policy as attributes of
`[sandbox]` like so:
```toml
[sandbox]
mode = "workspace-write"
writable_roots = [ "/tmp" ]
```
but if you wanted to temporarily change to a read-only sandbox, you
might feel compelled to modify your file to be:
```toml
[sandbox]
mode = "read-only"
# mode = "workspace-write"
# writable_roots = [ "/tmp" ]
```
Technically, commenting out `writable_roots` would not be strictly
necessary, as `mode = "read-only"` would ignore `writable_roots`, but
it's still a reasonable thing to do to keep things tidy.
Currently, the various values for `mode` do not support that many
attributes, so this is not that hard to maintain, but one could imagine
this becoming more complex in the future.
In this PR, we change Codex CLI so that it no longer recognizes
`[sandbox]`. Instead, it introduces a top-level option, `sandbox_mode`,
and `[sandbox_workspace_write]` is used to further configure the sandbox
when when `sandbox_mode = "workspace-write"` is used:
```toml
sandbox_mode = "workspace-write"
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
```
This feels a bit more future-proof in that it is less tedious to
configure different sandboxes:
```toml
sandbox_mode = "workspace-write"
[sandbox_read_only]
# read-only options here...
[sandbox_workspace_write]
writable_roots = [ "/tmp" ]
[sandbox_danger_full_access]
# danger-full-access options here...
```
In this scheme, you never need to comment out the configuration for an
individual sandbox type: you only need to redefine `sandbox_mode`.
Relatedly, previous to this change, a user had to do `-c
sandbox.mode=read-only` to change the mode on the command line. With
this change, things are arguably a bit cleaner because the equivalent
option is `-c sandbox_mode=read-only` (and now `-c
sandbox_workspace_write=...` can be set separately).
Though more importantly, we introduce the `-s/--sandbox` option to the
CLI, which maps directly to `sandbox_mode` in `config.toml`, making
config override behavior easier to reason about. Moreover, as you can
see in the updates to the various Markdown files, it is much easier to
explain how to configure sandboxing when things like `--sandbox
read-only` can be used as an example.
Relatedly, this cleanup also made it straightforward to add support for
a `sandbox` option for Codex when used as an MCP server (see the changes
to `mcp-server/src/codex_tool_config.rs`).
Fixes https://github.com/openai/codex/issues/1248.
2025-07-07 22:31:30 -07:00
|
|
|
},
|
|
|
|
|
sandbox_workspace_write_cfg.derive_sandbox_policy(sandbox_mode_override)
|
feat: redesign sandbox config (#1373)
This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:
```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"
# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = [] # Optional, defaults to the empty list.
network_access = false # Optional, defaults to false.
# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```
This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:
- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254
Outstanding issue:
- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
2025-06-24 16:59:47 -07:00
|
|
|
);
|
2025-04-29 18:42:52 -07:00
|
|
|
}
|
2025-09-14 21:30:56 -07:00
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
#[tokio::test]
|
|
|
|
|
async fn load_global_mcp_servers_returns_empty_if_missing() -> anyhow::Result<()> {
|
2025-09-14 21:30:56 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let servers = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-14 21:30:56 -07:00
|
|
|
assert!(servers.is_empty());
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
#[tokio::test]
|
|
|
|
|
async fn write_global_mcp_servers_round_trips_entries() -> anyhow::Result<()> {
|
2025-09-14 21:30:56 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
let mut servers = BTreeMap::new();
|
|
|
|
|
servers.insert(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
2025-09-26 18:24:01 -07:00
|
|
|
transport: McpServerTransportConfig::Stdio {
|
|
|
|
|
command: "echo".to_string(),
|
|
|
|
|
args: vec!["hello".to_string()],
|
|
|
|
|
env: None,
|
|
|
|
|
},
|
2025-09-22 10:30:59 -07:00
|
|
|
startup_timeout_sec: Some(Duration::from_secs(3)),
|
|
|
|
|
tool_timeout_sec: Some(Duration::from_secs(5)),
|
2025-09-14 21:30:56 -07:00
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
write_global_mcp_servers(codex_home.path(), &servers)?;
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-14 21:30:56 -07:00
|
|
|
assert_eq!(loaded.len(), 1);
|
|
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
2025-09-26 18:24:01 -07:00
|
|
|
match &docs.transport {
|
|
|
|
|
McpServerTransportConfig::Stdio { command, args, env } => {
|
|
|
|
|
assert_eq!(command, "echo");
|
|
|
|
|
assert_eq!(args, &vec!["hello".to_string()]);
|
|
|
|
|
assert!(env.is_none());
|
|
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
2025-09-22 10:30:59 -07:00
|
|
|
assert_eq!(docs.startup_timeout_sec, Some(Duration::from_secs(3)));
|
|
|
|
|
assert_eq!(docs.tool_timeout_sec, Some(Duration::from_secs(5)));
|
2025-09-14 21:30:56 -07:00
|
|
|
|
|
|
|
|
let empty = BTreeMap::new();
|
|
|
|
|
write_global_mcp_servers(codex_home.path(), &empty)?;
|
2025-10-03 13:02:26 -07:00
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-14 21:30:56 -07:00
|
|
|
assert!(loaded.is_empty());
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
2025-05-13 16:52:52 -07:00
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
#[tokio::test]
|
|
|
|
|
async fn managed_config_wins_over_cli_overrides() -> anyhow::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let managed_path = codex_home.path().join("managed_config.toml");
|
|
|
|
|
|
|
|
|
|
std::fs::write(
|
|
|
|
|
codex_home.path().join(CONFIG_TOML_FILE),
|
|
|
|
|
"model = \"base\"\n",
|
|
|
|
|
)?;
|
|
|
|
|
std::fs::write(&managed_path, "model = \"managed_config\"\n")?;
|
|
|
|
|
|
|
|
|
|
let overrides = crate::config_loader::LoaderOverrides {
|
|
|
|
|
managed_config_path: Some(managed_path),
|
|
|
|
|
#[cfg(target_os = "macos")]
|
|
|
|
|
managed_preferences_base64: None,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let root_value = load_resolved_config(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
vec![("model".to_string(), TomlValue::String("cli".to_string()))],
|
|
|
|
|
overrides,
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
let cfg: ConfigToml = root_value.try_into().map_err(|e| {
|
|
|
|
|
tracing::error!("Failed to deserialize overridden config: {e}");
|
|
|
|
|
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
assert_eq!(cfg.model.as_deref(), Some("managed_config"));
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn load_global_mcp_servers_accepts_legacy_ms_field() -> anyhow::Result<()> {
|
2025-09-22 10:30:59 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
|
|
|
|
|
std::fs::write(
|
|
|
|
|
&config_path,
|
|
|
|
|
r#"
|
|
|
|
|
[mcp_servers]
|
|
|
|
|
[mcp_servers.docs]
|
|
|
|
|
command = "echo"
|
|
|
|
|
startup_timeout_ms = 2500
|
|
|
|
|
"#,
|
|
|
|
|
)?;
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let servers = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-22 10:30:59 -07:00
|
|
|
let docs = servers.get("docs").expect("docs entry");
|
|
|
|
|
assert_eq!(docs.startup_timeout_sec, Some(Duration::from_millis(2500)));
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
#[tokio::test]
|
|
|
|
|
async fn write_global_mcp_servers_serializes_env_sorted() -> anyhow::Result<()> {
|
2025-09-26 18:24:01 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
let servers = BTreeMap::from([(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::Stdio {
|
|
|
|
|
command: "docs-server".to_string(),
|
|
|
|
|
args: vec!["--verbose".to_string()],
|
|
|
|
|
env: Some(HashMap::from([
|
|
|
|
|
("ZIG_VAR".to_string(), "3".to_string()),
|
|
|
|
|
("ALPHA_VAR".to_string(), "1".to_string()),
|
|
|
|
|
])),
|
|
|
|
|
},
|
|
|
|
|
startup_timeout_sec: None,
|
|
|
|
|
tool_timeout_sec: None,
|
|
|
|
|
},
|
|
|
|
|
)]);
|
|
|
|
|
|
|
|
|
|
write_global_mcp_servers(codex_home.path(), &servers)?;
|
|
|
|
|
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert_eq!(
|
|
|
|
|
serialized,
|
|
|
|
|
r#"[mcp_servers.docs]
|
|
|
|
|
command = "docs-server"
|
|
|
|
|
args = ["--verbose"]
|
|
|
|
|
|
|
|
|
|
[mcp_servers.docs.env]
|
|
|
|
|
ALPHA_VAR = "1"
|
|
|
|
|
ZIG_VAR = "3"
|
|
|
|
|
"#
|
|
|
|
|
);
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-26 18:24:01 -07:00
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
match &docs.transport {
|
|
|
|
|
McpServerTransportConfig::Stdio { command, args, env } => {
|
|
|
|
|
assert_eq!(command, "docs-server");
|
|
|
|
|
assert_eq!(args, &vec!["--verbose".to_string()]);
|
|
|
|
|
let env = env
|
|
|
|
|
.as_ref()
|
|
|
|
|
.expect("env should be preserved for stdio transport");
|
|
|
|
|
assert_eq!(env.get("ALPHA_VAR"), Some(&"1".to_string()));
|
|
|
|
|
assert_eq!(env.get("ZIG_VAR"), Some(&"3".to_string()));
|
|
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
#[tokio::test]
|
|
|
|
|
async fn write_global_mcp_servers_serializes_streamable_http() -> anyhow::Result<()> {
|
2025-09-26 18:24:01 -07:00
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
let mut servers = BTreeMap::from([(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::StreamableHttp {
|
|
|
|
|
url: "https://example.com/mcp".to_string(),
|
|
|
|
|
bearer_token: Some("secret-token".to_string()),
|
|
|
|
|
},
|
|
|
|
|
startup_timeout_sec: Some(Duration::from_secs(2)),
|
|
|
|
|
tool_timeout_sec: None,
|
|
|
|
|
},
|
|
|
|
|
)]);
|
|
|
|
|
|
|
|
|
|
write_global_mcp_servers(codex_home.path(), &servers)?;
|
|
|
|
|
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert_eq!(
|
|
|
|
|
serialized,
|
|
|
|
|
r#"[mcp_servers.docs]
|
|
|
|
|
url = "https://example.com/mcp"
|
|
|
|
|
bearer_token = "secret-token"
|
|
|
|
|
startup_timeout_sec = 2.0
|
|
|
|
|
"#
|
|
|
|
|
);
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-26 18:24:01 -07:00
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
match &docs.transport {
|
|
|
|
|
McpServerTransportConfig::StreamableHttp { url, bearer_token } => {
|
|
|
|
|
assert_eq!(url, "https://example.com/mcp");
|
|
|
|
|
assert_eq!(bearer_token.as_deref(), Some("secret-token"));
|
|
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
assert_eq!(docs.startup_timeout_sec, Some(Duration::from_secs(2)));
|
|
|
|
|
|
|
|
|
|
servers.insert(
|
|
|
|
|
"docs".to_string(),
|
|
|
|
|
McpServerConfig {
|
|
|
|
|
transport: McpServerTransportConfig::StreamableHttp {
|
|
|
|
|
url: "https://example.com/mcp".to_string(),
|
|
|
|
|
bearer_token: None,
|
|
|
|
|
},
|
|
|
|
|
startup_timeout_sec: None,
|
|
|
|
|
tool_timeout_sec: None,
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
write_global_mcp_servers(codex_home.path(), &servers)?;
|
|
|
|
|
|
|
|
|
|
let serialized = std::fs::read_to_string(&config_path)?;
|
|
|
|
|
assert_eq!(
|
|
|
|
|
serialized,
|
|
|
|
|
r#"[mcp_servers.docs]
|
|
|
|
|
url = "https://example.com/mcp"
|
|
|
|
|
"#
|
|
|
|
|
);
|
|
|
|
|
|
2025-10-03 13:02:26 -07:00
|
|
|
let loaded = load_global_mcp_servers(codex_home.path()).await?;
|
2025-09-26 18:24:01 -07:00
|
|
|
let docs = loaded.get("docs").expect("docs entry");
|
|
|
|
|
match &docs.transport {
|
|
|
|
|
McpServerTransportConfig::StreamableHttp { url, bearer_token } => {
|
|
|
|
|
assert_eq!(url, "https://example.com/mcp");
|
|
|
|
|
assert!(bearer_token.is_none());
|
|
|
|
|
}
|
|
|
|
|
other => panic!("unexpected transport {other:?}"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-11 15:04:29 -07:00
|
|
|
#[tokio::test]
|
|
|
|
|
async fn persist_model_selection_updates_defaults() -> anyhow::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
persist_model_selection(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
2025-09-15 08:17:13 -07:00
|
|
|
"gpt-5-codex",
|
2025-09-11 15:04:29 -07:00
|
|
|
Some(ReasoningEffort::High),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
let serialized =
|
|
|
|
|
tokio::fs::read_to_string(codex_home.path().join(CONFIG_TOML_FILE)).await?;
|
|
|
|
|
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
|
|
|
|
|
2025-09-15 08:17:13 -07:00
|
|
|
assert_eq!(parsed.model.as_deref(), Some("gpt-5-codex"));
|
2025-09-11 15:04:29 -07:00
|
|
|
assert_eq!(parsed.model_reasoning_effort, Some(ReasoningEffort::High));
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn persist_model_selection_overwrites_existing_model() -> anyhow::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
|
|
|
|
|
tokio::fs::write(
|
|
|
|
|
&config_path,
|
|
|
|
|
r#"
|
2025-09-22 20:10:52 -07:00
|
|
|
model = "gpt-5-codex"
|
2025-09-11 15:04:29 -07:00
|
|
|
model_reasoning_effort = "medium"
|
|
|
|
|
|
|
|
|
|
[profiles.dev]
|
|
|
|
|
model = "gpt-4.1"
|
|
|
|
|
"#,
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
persist_model_selection(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
None,
|
|
|
|
|
"o4-mini",
|
|
|
|
|
Some(ReasoningEffort::High),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
let serialized = tokio::fs::read_to_string(config_path).await?;
|
|
|
|
|
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
|
|
|
|
|
|
|
|
|
assert_eq!(parsed.model.as_deref(), Some("o4-mini"));
|
|
|
|
|
assert_eq!(parsed.model_reasoning_effort, Some(ReasoningEffort::High));
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parsed
|
|
|
|
|
.profiles
|
|
|
|
|
.get("dev")
|
|
|
|
|
.and_then(|profile| profile.model.as_deref()),
|
|
|
|
|
Some("gpt-4.1"),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn persist_model_selection_updates_profile() -> anyhow::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
|
|
|
|
|
persist_model_selection(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
Some("dev"),
|
2025-09-15 08:17:13 -07:00
|
|
|
"gpt-5-codex",
|
2025-09-12 22:44:05 -07:00
|
|
|
Some(ReasoningEffort::Medium),
|
2025-09-11 15:04:29 -07:00
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
let serialized =
|
|
|
|
|
tokio::fs::read_to_string(codex_home.path().join(CONFIG_TOML_FILE)).await?;
|
|
|
|
|
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
|
|
|
|
let profile = parsed
|
|
|
|
|
.profiles
|
|
|
|
|
.get("dev")
|
|
|
|
|
.expect("profile should be created");
|
|
|
|
|
|
2025-09-15 08:17:13 -07:00
|
|
|
assert_eq!(profile.model.as_deref(), Some("gpt-5-codex"));
|
2025-09-12 22:44:05 -07:00
|
|
|
assert_eq!(
|
|
|
|
|
profile.model_reasoning_effort,
|
|
|
|
|
Some(ReasoningEffort::Medium)
|
|
|
|
|
);
|
2025-09-11 15:04:29 -07:00
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn persist_model_selection_updates_existing_profile() -> anyhow::Result<()> {
|
|
|
|
|
let codex_home = TempDir::new()?;
|
|
|
|
|
let config_path = codex_home.path().join(CONFIG_TOML_FILE);
|
|
|
|
|
|
|
|
|
|
tokio::fs::write(
|
|
|
|
|
&config_path,
|
|
|
|
|
r#"
|
|
|
|
|
[profiles.dev]
|
|
|
|
|
model = "gpt-4"
|
|
|
|
|
model_reasoning_effort = "medium"
|
|
|
|
|
|
|
|
|
|
[profiles.prod]
|
2025-09-22 20:10:52 -07:00
|
|
|
model = "gpt-5-codex"
|
2025-09-11 15:04:29 -07:00
|
|
|
"#,
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
persist_model_selection(
|
|
|
|
|
codex_home.path(),
|
|
|
|
|
Some("dev"),
|
|
|
|
|
"o4-high",
|
|
|
|
|
Some(ReasoningEffort::Medium),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
let serialized = tokio::fs::read_to_string(config_path).await?;
|
|
|
|
|
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
|
|
|
|
|
|
|
|
|
let dev_profile = parsed
|
|
|
|
|
.profiles
|
|
|
|
|
.get("dev")
|
|
|
|
|
.expect("dev profile should survive updates");
|
|
|
|
|
assert_eq!(dev_profile.model.as_deref(), Some("o4-high"));
|
|
|
|
|
assert_eq!(
|
|
|
|
|
dev_profile.model_reasoning_effort,
|
|
|
|
|
Some(ReasoningEffort::Medium)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parsed
|
|
|
|
|
.profiles
|
|
|
|
|
.get("prod")
|
|
|
|
|
.and_then(|profile| profile.model.as_deref()),
|
2025-09-22 20:10:52 -07:00
|
|
|
Some("gpt-5-codex"),
|
2025-09-11 15:04:29 -07:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-15 00:30:13 -07:00
|
|
|
struct PrecedenceTestFixture {
|
|
|
|
|
cwd: TempDir,
|
|
|
|
|
codex_home: TempDir,
|
|
|
|
|
cfg: ConfigToml,
|
|
|
|
|
model_provider_map: HashMap<String, ModelProviderInfo>,
|
|
|
|
|
openai_provider: ModelProviderInfo,
|
|
|
|
|
openai_chat_completions_provider: ModelProviderInfo,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl PrecedenceTestFixture {
|
|
|
|
|
fn cwd(&self) -> PathBuf {
|
|
|
|
|
self.cwd.path().to_path_buf()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn codex_home(&self) -> PathBuf {
|
|
|
|
|
self.codex_home.path().to_path_buf()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn create_test_fixture() -> std::io::Result<PrecedenceTestFixture> {
|
2025-05-13 16:52:52 -07:00
|
|
|
let toml = r#"
|
|
|
|
|
model = "o3"
|
2025-06-24 22:19:21 -07:00
|
|
|
approval_policy = "untrusted"
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
# Can be used to determine which profile to use if not specified by
|
|
|
|
|
# `ConfigOverrides`.
|
|
|
|
|
profile = "gpt3"
|
|
|
|
|
|
|
|
|
|
[model_providers.openai-chat-completions]
|
|
|
|
|
name = "OpenAI using Chat Completions"
|
|
|
|
|
base_url = "https://api.openai.com/v1"
|
|
|
|
|
env_key = "OPENAI_API_KEY"
|
|
|
|
|
wire_api = "chat"
|
2025-07-18 12:12:39 -07:00
|
|
|
request_max_retries = 4 # retry failed HTTP requests
|
|
|
|
|
stream_max_retries = 10 # retry dropped SSE streams
|
|
|
|
|
stream_idle_timeout_ms = 300000 # 5m idle timeout
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
[profiles.o3]
|
|
|
|
|
model = "o3"
|
|
|
|
|
model_provider = "openai"
|
|
|
|
|
approval_policy = "never"
|
2025-07-08 22:05:22 +03:00
|
|
|
model_reasoning_effort = "high"
|
|
|
|
|
model_reasoning_summary = "detailed"
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
[profiles.gpt3]
|
|
|
|
|
model = "gpt-3.5-turbo"
|
|
|
|
|
model_provider = "openai-chat-completions"
|
|
|
|
|
|
|
|
|
|
[profiles.zdr]
|
|
|
|
|
model = "o3"
|
|
|
|
|
model_provider = "openai"
|
|
|
|
|
approval_policy = "on-failure"
|
2025-09-03 12:20:31 -07:00
|
|
|
|
|
|
|
|
[profiles.gpt5]
|
|
|
|
|
model = "gpt-5"
|
|
|
|
|
model_provider = "openai"
|
|
|
|
|
approval_policy = "on-failure"
|
|
|
|
|
model_reasoning_effort = "high"
|
|
|
|
|
model_reasoning_summary = "detailed"
|
|
|
|
|
model_verbosity = "high"
|
2025-05-13 16:52:52 -07:00
|
|
|
"#;
|
|
|
|
|
|
|
|
|
|
let cfg: ConfigToml = toml::from_str(toml).expect("TOML deserialization should succeed");
|
|
|
|
|
|
|
|
|
|
// Use a temporary directory for the cwd so it does not contain an
|
|
|
|
|
// AGENTS.md file.
|
|
|
|
|
let cwd_temp_dir = TempDir::new().unwrap();
|
|
|
|
|
let cwd = cwd_temp_dir.path().to_path_buf();
|
|
|
|
|
// Make it look like a Git repo so it does not search for AGENTS.md in
|
|
|
|
|
// a parent folder, either.
|
|
|
|
|
std::fs::write(cwd.join(".git"), "gitdir: nowhere")?;
|
|
|
|
|
|
2025-05-15 00:30:13 -07:00
|
|
|
let codex_home_temp_dir = TempDir::new().unwrap();
|
|
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
let openai_chat_completions_provider = ModelProviderInfo {
|
|
|
|
|
name: "OpenAI using Chat Completions".to_string(),
|
2025-07-30 12:40:15 -07:00
|
|
|
base_url: Some("https://api.openai.com/v1".to_string()),
|
2025-05-13 16:52:52 -07:00
|
|
|
env_key: Some("OPENAI_API_KEY".to_string()),
|
|
|
|
|
wire_api: crate::WireApi::Chat,
|
|
|
|
|
env_key_instructions: None,
|
2025-06-30 11:39:54 -07:00
|
|
|
query_params: None,
|
2025-07-07 13:09:16 -07:00
|
|
|
http_headers: None,
|
|
|
|
|
env_http_headers: None,
|
2025-07-18 12:12:39 -07:00
|
|
|
request_max_retries: Some(4),
|
|
|
|
|
stream_max_retries: Some(10),
|
|
|
|
|
stream_idle_timeout_ms: Some(300_000),
|
2025-08-06 13:02:00 -07:00
|
|
|
requires_openai_auth: false,
|
2025-05-13 16:52:52 -07:00
|
|
|
};
|
|
|
|
|
let model_provider_map = {
|
|
|
|
|
let mut model_provider_map = built_in_model_providers();
|
|
|
|
|
model_provider_map.insert(
|
|
|
|
|
"openai-chat-completions".to_string(),
|
|
|
|
|
openai_chat_completions_provider.clone(),
|
|
|
|
|
);
|
|
|
|
|
model_provider_map
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let openai_provider = model_provider_map
|
|
|
|
|
.get("openai")
|
|
|
|
|
.expect("openai provider should exist")
|
|
|
|
|
.clone();
|
|
|
|
|
|
2025-05-15 00:30:13 -07:00
|
|
|
Ok(PrecedenceTestFixture {
|
|
|
|
|
cwd: cwd_temp_dir,
|
|
|
|
|
codex_home: codex_home_temp_dir,
|
|
|
|
|
cfg,
|
|
|
|
|
model_provider_map,
|
|
|
|
|
openai_provider,
|
|
|
|
|
openai_chat_completions_provider,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Users can specify config values at multiple levels that have the
|
|
|
|
|
/// following precedence:
|
|
|
|
|
///
|
|
|
|
|
/// 1. custom command-line argument, e.g. `--model o3`
|
|
|
|
|
/// 2. as part of a profile, where the `--profile` is specified via a CLI
|
2025-07-29 11:22:02 -07:00
|
|
|
/// (or in the config file itself)
|
2025-05-15 00:30:13 -07:00
|
|
|
/// 3. as an entry in `config.toml`, e.g. `model = "o3"`
|
|
|
|
|
/// 4. the default value for a required field defined in code, e.g.,
|
|
|
|
|
/// `crate::flags::OPENAI_DEFAULT_MODEL`
|
|
|
|
|
///
|
|
|
|
|
/// Note that profiles are the recommended way to specify a group of
|
|
|
|
|
/// configuration options together.
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> {
|
|
|
|
|
let fixture = create_test_fixture()?;
|
|
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
let o3_profile_overrides = ConfigOverrides {
|
|
|
|
|
config_profile: Some("o3".to_string()),
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: Some(fixture.cwd()),
|
2025-05-13 16:52:52 -07:00
|
|
|
..Default::default()
|
|
|
|
|
};
|
2025-05-15 00:30:13 -07:00
|
|
|
let o3_profile_config: Config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
fixture.cfg.clone(),
|
|
|
|
|
o3_profile_overrides,
|
|
|
|
|
fixture.codex_home(),
|
|
|
|
|
)?;
|
2025-05-13 16:52:52 -07:00
|
|
|
assert_eq!(
|
|
|
|
|
Config {
|
|
|
|
|
model: "o3".to_string(),
|
2025-09-16 13:36:51 -07:00
|
|
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
2025-08-04 23:50:03 -07:00
|
|
|
model_family: find_family_for_model("o3").expect("known model slug"),
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
model_context_window: Some(200_000),
|
|
|
|
|
model_max_output_tokens: Some(100_000),
|
2025-09-12 13:07:10 -07:00
|
|
|
model_auto_compact_token_limit: None,
|
2025-05-13 16:52:52 -07:00
|
|
|
model_provider_id: "openai".to_string(),
|
2025-05-15 00:30:13 -07:00
|
|
|
model_provider: fixture.openai_provider.clone(),
|
2025-05-13 16:52:52 -07:00
|
|
|
approval_policy: AskForApproval::Never,
|
|
|
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
2025-07-22 09:42:22 -07:00
|
|
|
user_instructions: None,
|
2025-05-13 16:52:52 -07:00
|
|
|
notify: None,
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: fixture.cwd(),
|
2025-05-13 16:52:52 -07:00
|
|
|
mcp_servers: HashMap::new(),
|
2025-05-15 00:30:13 -07:00
|
|
|
model_providers: fixture.model_provider_map.clone(),
|
2025-05-13 16:52:52 -07:00
|
|
|
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
2025-10-01 11:19:59 -07:00
|
|
|
project_doc_fallback_filenames: Vec::new(),
|
2025-05-15 00:30:13 -07:00
|
|
|
codex_home: fixture.codex_home(),
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
history: History::default(),
|
2025-05-16 11:33:08 -07:00
|
|
|
file_opener: UriBasedFileOpener::VsCode,
|
2025-05-22 21:52:28 -07:00
|
|
|
codex_linux_sandbox_exe: None,
|
2025-05-30 23:14:56 -07:00
|
|
|
hide_agent_reasoning: false,
|
2025-08-05 01:56:13 -07:00
|
|
|
show_raw_agent_reasoning: false,
|
2025-09-12 12:06:33 -07:00
|
|
|
model_reasoning_effort: Some(ReasoningEffort::High),
|
2025-07-08 22:05:22 +03:00
|
|
|
model_reasoning_summary: ReasoningSummary::Detailed,
|
2025-08-22 17:12:10 +01:00
|
|
|
model_verbosity: None,
|
2025-07-11 13:30:11 -04:00
|
|
|
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
2025-07-22 09:42:22 -07:00
|
|
|
base_instructions: None,
|
2025-07-29 11:22:02 -07:00
|
|
|
include_plan_tool: false,
|
2025-08-15 11:55:53 -04:00
|
|
|
include_apply_patch_tool: false,
|
2025-08-23 22:58:56 -07:00
|
|
|
tools_web_search_request: false,
|
2025-08-22 18:10:55 -07:00
|
|
|
use_experimental_streamable_shell_tool: false,
|
2025-09-11 09:19:12 -07:00
|
|
|
use_experimental_unified_exec_tool: false,
|
2025-09-26 10:13:37 -07:00
|
|
|
use_experimental_use_rmcp_client: false,
|
2025-08-27 17:41:23 -07:00
|
|
|
include_view_image_tool: true,
|
2025-09-10 13:53:46 -07:00
|
|
|
active_profile: Some("o3".to_string()),
|
2025-10-04 17:41:40 -07:00
|
|
|
windows_wsl_setup_acknowledged: false,
|
2025-08-28 12:54:12 -07:00
|
|
|
disable_paste_burst: false,
|
2025-09-15 10:22:02 -07:00
|
|
|
tui_notifications: Default::default(),
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
otel: OtelConfig::default(),
|
2025-05-13 16:52:52 -07:00
|
|
|
},
|
|
|
|
|
o3_profile_config
|
|
|
|
|
);
|
2025-05-15 00:30:13 -07:00
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> {
|
|
|
|
|
let fixture = create_test_fixture()?;
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
let gpt3_profile_overrides = ConfigOverrides {
|
|
|
|
|
config_profile: Some("gpt3".to_string()),
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: Some(fixture.cwd()),
|
2025-05-13 16:52:52 -07:00
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
let gpt3_profile_config = Config::load_from_base_config_with_overrides(
|
2025-05-15 00:30:13 -07:00
|
|
|
fixture.cfg.clone(),
|
2025-05-13 16:52:52 -07:00
|
|
|
gpt3_profile_overrides,
|
2025-05-15 00:30:13 -07:00
|
|
|
fixture.codex_home(),
|
2025-05-13 16:52:52 -07:00
|
|
|
)?;
|
|
|
|
|
let expected_gpt3_profile_config = Config {
|
|
|
|
|
model: "gpt-3.5-turbo".to_string(),
|
2025-09-16 13:36:51 -07:00
|
|
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
2025-08-04 23:50:03 -07:00
|
|
|
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
model_context_window: Some(16_385),
|
|
|
|
|
model_max_output_tokens: Some(4_096),
|
2025-09-12 13:07:10 -07:00
|
|
|
model_auto_compact_token_limit: None,
|
2025-05-13 16:52:52 -07:00
|
|
|
model_provider_id: "openai-chat-completions".to_string(),
|
2025-05-15 00:30:13 -07:00
|
|
|
model_provider: fixture.openai_chat_completions_provider.clone(),
|
2025-06-25 12:26:13 -07:00
|
|
|
approval_policy: AskForApproval::UnlessTrusted,
|
2025-05-13 16:52:52 -07:00
|
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
2025-07-22 09:42:22 -07:00
|
|
|
user_instructions: None,
|
2025-05-13 16:52:52 -07:00
|
|
|
notify: None,
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: fixture.cwd(),
|
2025-05-13 16:52:52 -07:00
|
|
|
mcp_servers: HashMap::new(),
|
2025-05-15 00:30:13 -07:00
|
|
|
model_providers: fixture.model_provider_map.clone(),
|
2025-05-13 16:52:52 -07:00
|
|
|
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
2025-10-01 11:19:59 -07:00
|
|
|
project_doc_fallback_filenames: Vec::new(),
|
2025-05-15 00:30:13 -07:00
|
|
|
codex_home: fixture.codex_home(),
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
history: History::default(),
|
2025-05-16 11:33:08 -07:00
|
|
|
file_opener: UriBasedFileOpener::VsCode,
|
2025-05-22 21:52:28 -07:00
|
|
|
codex_linux_sandbox_exe: None,
|
2025-05-30 23:14:56 -07:00
|
|
|
hide_agent_reasoning: false,
|
2025-08-05 01:56:13 -07:00
|
|
|
show_raw_agent_reasoning: false,
|
2025-09-12 12:06:33 -07:00
|
|
|
model_reasoning_effort: None,
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
model_reasoning_summary: ReasoningSummary::default(),
|
2025-08-22 17:12:10 +01:00
|
|
|
model_verbosity: None,
|
2025-07-11 13:30:11 -04:00
|
|
|
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
2025-07-22 09:42:22 -07:00
|
|
|
base_instructions: None,
|
2025-07-29 11:22:02 -07:00
|
|
|
include_plan_tool: false,
|
2025-08-15 11:55:53 -04:00
|
|
|
include_apply_patch_tool: false,
|
2025-08-23 22:58:56 -07:00
|
|
|
tools_web_search_request: false,
|
2025-08-22 18:10:55 -07:00
|
|
|
use_experimental_streamable_shell_tool: false,
|
2025-09-11 09:19:12 -07:00
|
|
|
use_experimental_unified_exec_tool: false,
|
2025-09-26 10:13:37 -07:00
|
|
|
use_experimental_use_rmcp_client: false,
|
2025-08-27 17:41:23 -07:00
|
|
|
include_view_image_tool: true,
|
2025-09-10 13:53:46 -07:00
|
|
|
active_profile: Some("gpt3".to_string()),
|
2025-10-04 17:41:40 -07:00
|
|
|
windows_wsl_setup_acknowledged: false,
|
2025-08-28 12:54:12 -07:00
|
|
|
disable_paste_burst: false,
|
2025-09-15 10:22:02 -07:00
|
|
|
tui_notifications: Default::default(),
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
otel: OtelConfig::default(),
|
2025-05-13 16:52:52 -07:00
|
|
|
};
|
2025-05-15 00:30:13 -07:00
|
|
|
|
|
|
|
|
assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
// Verify that loading without specifying a profile in ConfigOverrides
|
2025-05-15 00:30:13 -07:00
|
|
|
// uses the default profile from the config file (which is "gpt3").
|
2025-05-13 16:52:52 -07:00
|
|
|
let default_profile_overrides = ConfigOverrides {
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: Some(fixture.cwd()),
|
2025-05-13 16:52:52 -07:00
|
|
|
..Default::default()
|
|
|
|
|
};
|
2025-05-15 00:30:13 -07:00
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
let default_profile_config = Config::load_from_base_config_with_overrides(
|
2025-05-15 00:30:13 -07:00
|
|
|
fixture.cfg.clone(),
|
2025-05-13 16:52:52 -07:00
|
|
|
default_profile_overrides,
|
2025-05-15 00:30:13 -07:00
|
|
|
fixture.codex_home(),
|
2025-05-13 16:52:52 -07:00
|
|
|
)?;
|
2025-05-15 00:30:13 -07:00
|
|
|
|
2025-05-13 16:52:52 -07:00
|
|
|
assert_eq!(expected_gpt3_profile_config, default_profile_config);
|
2025-05-15 00:30:13 -07:00
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> {
|
|
|
|
|
let fixture = create_test_fixture()?;
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
let zdr_profile_overrides = ConfigOverrides {
|
|
|
|
|
config_profile: Some("zdr".to_string()),
|
2025-05-15 00:30:13 -07:00
|
|
|
cwd: Some(fixture.cwd()),
|
2025-05-13 16:52:52 -07:00
|
|
|
..Default::default()
|
|
|
|
|
};
|
2025-05-15 00:30:13 -07:00
|
|
|
let zdr_profile_config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
fixture.cfg.clone(),
|
|
|
|
|
zdr_profile_overrides,
|
|
|
|
|
fixture.codex_home(),
|
|
|
|
|
)?;
|
|
|
|
|
let expected_zdr_profile_config = Config {
|
|
|
|
|
model: "o3".to_string(),
|
2025-09-16 13:36:51 -07:00
|
|
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
2025-08-04 23:50:03 -07:00
|
|
|
model_family: find_family_for_model("o3").expect("known model slug"),
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
model_context_window: Some(200_000),
|
|
|
|
|
model_max_output_tokens: Some(100_000),
|
2025-09-12 13:07:10 -07:00
|
|
|
model_auto_compact_token_limit: None,
|
2025-05-15 00:30:13 -07:00
|
|
|
model_provider_id: "openai".to_string(),
|
|
|
|
|
model_provider: fixture.openai_provider.clone(),
|
|
|
|
|
approval_policy: AskForApproval::OnFailure,
|
|
|
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
2025-05-22 09:51:19 -07:00
|
|
|
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
2025-07-22 09:42:22 -07:00
|
|
|
user_instructions: None,
|
2025-05-15 00:30:13 -07:00
|
|
|
notify: None,
|
|
|
|
|
cwd: fixture.cwd(),
|
|
|
|
|
mcp_servers: HashMap::new(),
|
|
|
|
|
model_providers: fixture.model_provider_map.clone(),
|
|
|
|
|
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
2025-10-01 11:19:59 -07:00
|
|
|
project_doc_fallback_filenames: Vec::new(),
|
2025-05-15 00:30:13 -07:00
|
|
|
codex_home: fixture.codex_home(),
|
feat: record messages from user in ~/.codex/history.jsonl (#939)
This is a large change to support a "history" feature like you would
expect in a shell like Bash.
History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.
Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:
https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17
We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.
As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:
```toml
[history]
persistence = "none"
```
Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).
The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)
The motivation behind this crazy scheme is that it is designed to defend
against:
* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)
Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
2025-05-15 16:26:23 -07:00
|
|
|
history: History::default(),
|
2025-05-16 11:33:08 -07:00
|
|
|
file_opener: UriBasedFileOpener::VsCode,
|
2025-05-22 21:52:28 -07:00
|
|
|
codex_linux_sandbox_exe: None,
|
2025-05-30 23:14:56 -07:00
|
|
|
hide_agent_reasoning: false,
|
2025-08-05 01:56:13 -07:00
|
|
|
show_raw_agent_reasoning: false,
|
2025-09-12 12:06:33 -07:00
|
|
|
model_reasoning_effort: None,
|
feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-rs/core/src/client.rs#L108-L111
Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:
```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```
We take a cue from the TypeScript CLI, which does a check on the model
name:
https://github.com/openai/codex/blob/d7245cbbc9d8ff5446da45e5951761103492476d/codex-cli/src/utils/agent/agent-loop.ts#L786-L789
This PR does a similar check, though also adds support for the following
config options:
```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```
This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)
This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
|
|
|
model_reasoning_summary: ReasoningSummary::default(),
|
2025-08-22 17:12:10 +01:00
|
|
|
model_verbosity: None,
|
2025-07-11 13:30:11 -04:00
|
|
|
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
2025-07-22 09:42:22 -07:00
|
|
|
base_instructions: None,
|
2025-07-29 11:22:02 -07:00
|
|
|
include_plan_tool: false,
|
2025-08-15 11:55:53 -04:00
|
|
|
include_apply_patch_tool: false,
|
2025-08-23 22:58:56 -07:00
|
|
|
tools_web_search_request: false,
|
2025-08-22 18:10:55 -07:00
|
|
|
use_experimental_streamable_shell_tool: false,
|
2025-09-11 09:19:12 -07:00
|
|
|
use_experimental_unified_exec_tool: false,
|
2025-09-26 10:13:37 -07:00
|
|
|
use_experimental_use_rmcp_client: false,
|
2025-08-27 17:41:23 -07:00
|
|
|
include_view_image_tool: true,
|
2025-09-10 13:53:46 -07:00
|
|
|
active_profile: Some("zdr".to_string()),
|
2025-10-04 17:41:40 -07:00
|
|
|
windows_wsl_setup_acknowledged: false,
|
2025-08-28 12:54:12 -07:00
|
|
|
disable_paste_burst: false,
|
2025-09-15 10:22:02 -07:00
|
|
|
tui_notifications: Default::default(),
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
otel: OtelConfig::default(),
|
2025-05-15 00:30:13 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
assert_eq!(expected_zdr_profile_config, zdr_profile_config);
|
2025-05-13 16:52:52 -07:00
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-09-03 12:20:31 -07:00
|
|
|
#[test]
|
|
|
|
|
fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> {
|
|
|
|
|
let fixture = create_test_fixture()?;
|
|
|
|
|
|
|
|
|
|
let gpt5_profile_overrides = ConfigOverrides {
|
|
|
|
|
config_profile: Some("gpt5".to_string()),
|
|
|
|
|
cwd: Some(fixture.cwd()),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
let gpt5_profile_config = Config::load_from_base_config_with_overrides(
|
|
|
|
|
fixture.cfg.clone(),
|
|
|
|
|
gpt5_profile_overrides,
|
|
|
|
|
fixture.codex_home(),
|
|
|
|
|
)?;
|
|
|
|
|
let expected_gpt5_profile_config = Config {
|
|
|
|
|
model: "gpt-5".to_string(),
|
2025-09-16 13:36:51 -07:00
|
|
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
2025-09-03 12:20:31 -07:00
|
|
|
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
2025-09-04 16:34:14 -07:00
|
|
|
model_context_window: Some(272_000),
|
2025-09-03 12:20:31 -07:00
|
|
|
model_max_output_tokens: Some(128_000),
|
2025-09-12 13:07:10 -07:00
|
|
|
model_auto_compact_token_limit: None,
|
2025-09-03 12:20:31 -07:00
|
|
|
model_provider_id: "openai".to_string(),
|
|
|
|
|
model_provider: fixture.openai_provider.clone(),
|
|
|
|
|
approval_policy: AskForApproval::OnFailure,
|
|
|
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
|
|
|
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
|
|
|
|
user_instructions: None,
|
|
|
|
|
notify: None,
|
|
|
|
|
cwd: fixture.cwd(),
|
|
|
|
|
mcp_servers: HashMap::new(),
|
|
|
|
|
model_providers: fixture.model_provider_map.clone(),
|
|
|
|
|
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
2025-10-01 11:19:59 -07:00
|
|
|
project_doc_fallback_filenames: Vec::new(),
|
2025-09-03 12:20:31 -07:00
|
|
|
codex_home: fixture.codex_home(),
|
|
|
|
|
history: History::default(),
|
|
|
|
|
file_opener: UriBasedFileOpener::VsCode,
|
|
|
|
|
codex_linux_sandbox_exe: None,
|
|
|
|
|
hide_agent_reasoning: false,
|
|
|
|
|
show_raw_agent_reasoning: false,
|
2025-09-12 12:06:33 -07:00
|
|
|
model_reasoning_effort: Some(ReasoningEffort::High),
|
2025-09-03 12:20:31 -07:00
|
|
|
model_reasoning_summary: ReasoningSummary::Detailed,
|
|
|
|
|
model_verbosity: Some(Verbosity::High),
|
|
|
|
|
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
|
|
|
|
base_instructions: None,
|
|
|
|
|
include_plan_tool: false,
|
|
|
|
|
include_apply_patch_tool: false,
|
|
|
|
|
tools_web_search_request: false,
|
|
|
|
|
use_experimental_streamable_shell_tool: false,
|
2025-09-11 09:19:12 -07:00
|
|
|
use_experimental_unified_exec_tool: false,
|
2025-09-26 10:13:37 -07:00
|
|
|
use_experimental_use_rmcp_client: false,
|
2025-09-03 12:20:31 -07:00
|
|
|
include_view_image_tool: true,
|
2025-09-10 13:53:46 -07:00
|
|
|
active_profile: Some("gpt5".to_string()),
|
2025-10-04 17:41:40 -07:00
|
|
|
windows_wsl_setup_acknowledged: false,
|
2025-09-03 12:20:31 -07:00
|
|
|
disable_paste_burst: false,
|
2025-09-15 10:22:02 -07:00
|
|
|
tui_notifications: Default::default(),
|
OpenTelemetry events (#2103)
### Title
## otel
Codex can emit [OpenTelemetry](https://opentelemetry.io/) **log events**
that
describe each run: outbound API requests, streamed responses, user
input,
tool-approval decisions, and the result of every tool invocation. Export
is
**disabled by default** so local runs remain self-contained. Opt in by
adding an
`[otel]` table and choosing an exporter.
```toml
[otel]
environment = "staging" # defaults to "dev"
exporter = "none" # defaults to "none"; set to otlp-http or otlp-grpc to send events
log_user_prompt = false # defaults to false; redact prompt text unless explicitly enabled
```
Codex tags every exported event with `service.name = "codex-cli"`, the
CLI
version, and an `env` attribute so downstream collectors can distinguish
dev/staging/prod traffic. Only telemetry produced inside the
`codex_otel`
crate—the events listed below—is forwarded to the exporter.
### Event catalog
Every event shares a common set of metadata fields: `event.timestamp`,
`conversation.id`, `app.version`, `auth_mode` (when available),
`user.account_id` (when available), `terminal.type`, `model`, and
`slug`.
With OTEL enabled Codex emits the following event types (in addition to
the
metadata above):
- `codex.api_request`
- `cf_ray` (optional)
- `attempt`
- `duration_ms`
- `http.response.status_code` (optional)
- `error.message` (failures)
- `codex.sse_event`
- `event.kind`
- `duration_ms`
- `error.message` (failures)
- `input_token_count` (completion only)
- `output_token_count` (completion only)
- `cached_token_count` (completion only, optional)
- `reasoning_token_count` (completion only, optional)
- `tool_token_count` (completion only)
- `codex.user_prompt`
- `prompt_length`
- `prompt` (redacted unless `log_user_prompt = true`)
- `codex.tool_decision`
- `tool_name`
- `call_id`
- `decision` (`approved`, `approved_for_session`, `denied`, or `abort`)
- `source` (`config` or `user`)
- `codex.tool_result`
- `tool_name`
- `call_id`
- `arguments`
- `duration_ms` (execution time for the tool)
- `success` (`"true"` or `"false"`)
- `output`
### Choosing an exporter
Set `otel.exporter` to control where events go:
- `none` – leaves instrumentation active but skips exporting. This is
the
default.
- `otlp-http` – posts OTLP log records to an OTLP/HTTP collector.
Specify the
endpoint, protocol, and headers your collector expects:
```toml
[otel]
exporter = { otlp-http = {
endpoint = "https://otel.example.com/v1/logs",
protocol = "binary",
headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }
}}
```
- `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint
and any
metadata headers:
```toml
[otel]
exporter = { otlp-grpc = {
endpoint = "https://otel.example.com:4317",
headers = { "x-otlp-meta" = "abc123" }
}}
```
If the exporter is `none` nothing is written anywhere; otherwise you
must run or point to your
own collector. All exporters run on a background batch worker that is
flushed on
shutdown.
If you build Codex from source the OTEL crate is still behind an `otel`
feature
flag; the official prebuilt binaries ship with the feature enabled. When
the
feature is disabled the telemetry hooks become no-ops so the CLI
continues to
function without the extra dependencies.
---------
Co-authored-by: Anton Panasenko <apanasenko@openai.com>
2025-09-29 19:30:55 +01:00
|
|
|
otel: OtelConfig::default(),
|
2025-09-03 12:20:31 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
assert_eq!(expected_gpt5_profile_config, gpt5_profile_config);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-21 13:20:36 -07:00
|
|
|
#[test]
|
|
|
|
|
fn test_set_project_trusted_writes_explicit_tables() -> anyhow::Result<()> {
|
2025-09-10 16:01:31 -07:00
|
|
|
let project_dir = Path::new("/some/path");
|
|
|
|
|
let mut doc = DocumentMut::new();
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-09-10 16:01:31 -07:00
|
|
|
set_project_trusted_inner(&mut doc, project_dir)?;
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-09-10 16:01:31 -07:00
|
|
|
let contents = doc.to_string();
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-09-10 16:01:31 -07:00
|
|
|
let raw_path = project_dir.to_string_lossy();
|
2025-08-22 14:04:21 -07:00
|
|
|
let path_str = if raw_path.contains('\\') {
|
2025-08-28 11:25:23 -07:00
|
|
|
format!("'{raw_path}'")
|
2025-08-22 14:04:21 -07:00
|
|
|
} else {
|
2025-08-28 11:25:23 -07:00
|
|
|
format!("\"{raw_path}\"")
|
2025-08-22 14:04:21 -07:00
|
|
|
};
|
|
|
|
|
let expected = format!(
|
|
|
|
|
r#"[projects.{path_str}]
|
|
|
|
|
trust_level = "trusted"
|
|
|
|
|
"#
|
2025-08-21 13:20:36 -07:00
|
|
|
);
|
2025-08-22 14:04:21 -07:00
|
|
|
assert_eq!(contents, expected);
|
2025-08-21 13:20:36 -07:00
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_set_project_trusted_converts_inline_to_explicit() -> anyhow::Result<()> {
|
2025-09-10 16:01:31 -07:00
|
|
|
let project_dir = Path::new("/some/path");
|
2025-08-21 13:20:36 -07:00
|
|
|
|
|
|
|
|
// Seed config.toml with an inline project entry under [projects]
|
2025-09-10 16:01:31 -07:00
|
|
|
let raw_path = project_dir.to_string_lossy();
|
2025-08-22 14:04:21 -07:00
|
|
|
let path_str = if raw_path.contains('\\') {
|
2025-08-28 11:25:23 -07:00
|
|
|
format!("'{raw_path}'")
|
2025-08-22 14:04:21 -07:00
|
|
|
} else {
|
2025-08-28 11:25:23 -07:00
|
|
|
format!("\"{raw_path}\"")
|
2025-08-22 14:04:21 -07:00
|
|
|
};
|
|
|
|
|
// Use a quoted key so backslashes don't require escaping on Windows
|
2025-08-21 13:20:36 -07:00
|
|
|
let initial = format!(
|
2025-08-22 14:04:21 -07:00
|
|
|
r#"[projects]
|
|
|
|
|
{path_str} = {{ trust_level = "untrusted" }}
|
|
|
|
|
"#
|
2025-08-21 13:20:36 -07:00
|
|
|
);
|
2025-09-10 16:01:31 -07:00
|
|
|
let mut doc = initial.parse::<DocumentMut>()?;
|
2025-08-21 13:20:36 -07:00
|
|
|
|
|
|
|
|
// Run the function; it should convert to explicit tables and set trusted
|
2025-09-10 16:01:31 -07:00
|
|
|
set_project_trusted_inner(&mut doc, project_dir)?;
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-09-10 16:01:31 -07:00
|
|
|
let contents = doc.to_string();
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-08-22 14:04:21 -07:00
|
|
|
// Assert exact output after conversion to explicit table
|
|
|
|
|
let expected = format!(
|
|
|
|
|
r#"[projects]
|
2025-08-21 13:20:36 -07:00
|
|
|
|
2025-08-22 14:04:21 -07:00
|
|
|
[projects.{path_str}]
|
|
|
|
|
trust_level = "trusted"
|
|
|
|
|
"#
|
2025-08-21 13:20:36 -07:00
|
|
|
);
|
2025-08-22 14:04:21 -07:00
|
|
|
assert_eq!(contents, expected);
|
2025-08-21 13:20:36 -07:00
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
2025-09-10 16:01:31 -07:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_set_project_trusted_migrates_top_level_inline_projects_preserving_entries()
|
|
|
|
|
-> anyhow::Result<()> {
|
|
|
|
|
let initial = r#"toplevel = "baz"
|
|
|
|
|
projects = { "/Users/mbolin/code/codex4" = { trust_level = "trusted", foo = "bar" } , "/Users/mbolin/code/codex3" = { trust_level = "trusted" } }
|
|
|
|
|
model = "foo""#;
|
|
|
|
|
let mut doc = initial.parse::<DocumentMut>()?;
|
|
|
|
|
|
|
|
|
|
// Approve a new directory
|
|
|
|
|
let new_project = Path::new("/Users/mbolin/code/codex2");
|
|
|
|
|
set_project_trusted_inner(&mut doc, new_project)?;
|
|
|
|
|
|
|
|
|
|
let contents = doc.to_string();
|
|
|
|
|
|
|
|
|
|
// Since we created the [projects] table as part of migration, it is kept implicit.
|
|
|
|
|
// Expect explicit per-project tables, preserving prior entries and appending the new one.
|
|
|
|
|
let expected = r#"toplevel = "baz"
|
|
|
|
|
model = "foo"
|
|
|
|
|
|
|
|
|
|
[projects."/Users/mbolin/code/codex4"]
|
|
|
|
|
trust_level = "trusted"
|
|
|
|
|
foo = "bar"
|
|
|
|
|
|
|
|
|
|
[projects."/Users/mbolin/code/codex3"]
|
|
|
|
|
trust_level = "trusted"
|
|
|
|
|
|
|
|
|
|
[projects."/Users/mbolin/code/codex2"]
|
|
|
|
|
trust_level = "trusted"
|
|
|
|
|
"#;
|
|
|
|
|
assert_eq!(contents, expected);
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
2025-04-29 18:42:52 -07:00
|
|
|
}
|
2025-09-15 10:22:02 -07:00
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod notifications_tests {
|
|
|
|
|
use crate::config_types::Notifications;
|
|
|
|
|
use serde::Deserialize;
|
|
|
|
|
|
|
|
|
|
#[derive(Deserialize, Debug, PartialEq)]
|
|
|
|
|
struct TuiTomlTest {
|
|
|
|
|
notifications: Notifications,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Deserialize, Debug, PartialEq)]
|
|
|
|
|
struct RootTomlTest {
|
|
|
|
|
tui: TuiTomlTest,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_tui_notifications_true() {
|
|
|
|
|
let toml = r#"
|
|
|
|
|
[tui]
|
|
|
|
|
notifications = true
|
|
|
|
|
"#;
|
|
|
|
|
let parsed: RootTomlTest = toml::from_str(toml).expect("deserialize notifications=true");
|
|
|
|
|
assert!(matches!(
|
|
|
|
|
parsed.tui.notifications,
|
|
|
|
|
Notifications::Enabled(true)
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_tui_notifications_custom_array() {
|
|
|
|
|
let toml = r#"
|
|
|
|
|
[tui]
|
|
|
|
|
notifications = ["foo"]
|
|
|
|
|
"#;
|
|
|
|
|
let parsed: RootTomlTest =
|
|
|
|
|
toml::from_str(toml).expect("deserialize notifications=[\"foo\"]");
|
|
|
|
|
assert!(matches!(
|
|
|
|
|
parsed.tui.notifications,
|
|
|
|
|
Notifications::Custom(ref v) if v == &vec!["foo".to_string()]
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
}
|