feat: introduce support for shell_environment_policy in config.toml (#1061)
To date, when handling `shell` and `local_shell` tool calls, we were
spawning new processes using the environment inherited from the Codex
process itself. This means that the sensitive `OPENAI_API_KEY` that
Codex needs to talk to OpenAI models was made available to everything
run by `shell` and `local_shell`. While there are cases where that might
be useful, it does not seem like a good default.
This PR introduces a complex `shell_environment_policy` config option to
control the `env` used with these tool calls. It is inevitably a bit
complex so that it is possible to override individual components of the
policy so without having to restate the entire thing.
Details are in the updated `README.md` in this PR, but here is the
relevant bit that explains the individual fields of
`shell_environment_policy`:
| Field | Type | Default | Description |
| ------------------------- | -------------------------- | ------- |
-----------------------------------------------------------------------------------------------------------------------------------------------
|
| `inherit` | string | `core` | Starting template for the
environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full
parent env), or `none` (start empty). |
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex
removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN`
(case-insensitive) before other rules run. |
| `exclude` | array<string> | `[]` | Case-insensitive glob
patterns to drop after the default filter.<br>Examples: `"AWS_*"`,
`"AZURE_*"`. |
| `set` | table<string,string> | `{}` | Explicit key/value
overrides or additions – always win over inherited values. |
| `include_only` | array<string> | `[]` | If non-empty, a
whitelist of patterns; only variables that match _one_ pattern survive
the final step. (Generally used with `inherit = "all"`.) |
In particular, note that the default is `inherit = "core"`, so:
* if you have extra env variables that you want to inherit from the
parent process, use `inherit = "all"` and then specify `include_only`
* if you have extra env variables where you want to hardcode the values,
the default `inherit = "core"` will work fine, but then you need to
specify `set`
This configuration is not battle-tested, so we will probably still have
to play with it a bit. `core/src/exec_env.rs` has the critical business
logic as well as unit tests.
Though if nothing else, previous to this change:
```
$ cargo run --bin codex -- debug seatbelt -- printenv OPENAI_API_KEY
# ...prints OPENAI_API_KEY...
```
But after this change it does not print anything (as desired).
One final thing to call out about this PR is that the
`configure_command!` macro we use in `core/src/exec.rs` has to do some
complex logic with respect to how it builds up the `env` for the process
being spawned under Landlock/seccomp. Specifically, doing
`cmd.env_clear()` followed by `cmd.envs(&$env_map)` (which is arguably
the most intuitive way to do it) caused the Landlock unit tests to fail
because the processes spawned by the unit tests started failing in
unexpected ways! If we forgo `env_clear()` in favor of updating env vars
one at a time, the tests still pass. The comment in the code talks about
this a bit, and while I would like to investigate this more, I need to
move on for the moment, but I do plan to come back to it to fully
understand what is going on. For example, this suggests that we might
not be able to spawn a C program that calls `env_clear()`, which would
be...weird. We may still have to fiddle with our Landlock config if that
is the case.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::process::ExitStatusExt;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
@@ -59,6 +60,7 @@ pub struct ExecParams {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub env: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
@@ -87,12 +89,14 @@ pub async fn process_exec_tool_call(
|
||||
command,
|
||||
cwd,
|
||||
timeout_ms,
|
||||
env,
|
||||
} = params;
|
||||
let child = spawn_command_under_seatbelt(
|
||||
command,
|
||||
sandbox_policy,
|
||||
cwd,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
env,
|
||||
)
|
||||
.await?;
|
||||
consume_truncated_output(child, ctrl_c, timeout_ms).await
|
||||
@@ -145,9 +149,10 @@ pub async fn spawn_command_under_seatbelt(
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: PathBuf,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
let seatbelt_command = create_seatbelt_command(command, sandbox_policy, &cwd);
|
||||
spawn_child_async(seatbelt_command, cwd, sandbox_policy, stdio_policy).await
|
||||
spawn_child_async(seatbelt_command, cwd, sandbox_policy, stdio_policy, env).await
|
||||
}
|
||||
|
||||
fn create_seatbelt_command(
|
||||
@@ -233,6 +238,7 @@ async fn exec(
|
||||
command,
|
||||
cwd,
|
||||
timeout_ms,
|
||||
env,
|
||||
}: ExecParams,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
ctrl_c: Arc<Notify>,
|
||||
@@ -242,6 +248,7 @@ async fn exec(
|
||||
cwd,
|
||||
sandbox_policy,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
env,
|
||||
)
|
||||
.await?;
|
||||
consume_truncated_output(child, ctrl_c, timeout_ms).await
|
||||
@@ -259,7 +266,8 @@ macro_rules! configure_command {
|
||||
$command: expr,
|
||||
$cwd: expr,
|
||||
$sandbox_policy: expr,
|
||||
$stdio_policy: expr
|
||||
$stdio_policy: expr,
|
||||
$env_map: expr
|
||||
) => {{
|
||||
// For now, we take `SandboxPolicy` as a parameter to spawn_child() because
|
||||
// we need to determine whether to set the
|
||||
@@ -279,6 +287,38 @@ macro_rules! configure_command {
|
||||
cmd.args(&$command[1..]);
|
||||
cmd.current_dir($cwd);
|
||||
|
||||
// Previously, to update the env for `cmd`, we did the straightforward
|
||||
// thing of calling `env_clear()` followed by `envs(&env_map)` so
|
||||
// that the spawned process inherited *only* the variables explicitly
|
||||
// provided by the caller. On Linux, the combination of `env_clear()`
|
||||
// and Landlock/seccomp caused a permission error whereas this more
|
||||
// "surgical" approach of setting variables individually appears to
|
||||
// work fine. More time with `strace` and friends is merited to fully
|
||||
// debug thus, though we will soon use a helper binary like we do for
|
||||
// Seatbelt, which will simplify this logic.
|
||||
|
||||
// Iterate through the current process environment first so we can
|
||||
// decide, for every variable that already exists, whether we need to
|
||||
// override its value.
|
||||
let mut remaining_overrides = $env_map.clone();
|
||||
for (key, current_val) in std::env::vars() {
|
||||
if let Some(desired_val) = remaining_overrides.remove(&key) {
|
||||
// The caller provided a value for this variable. Override it
|
||||
// only if the value differs from what is currently set.
|
||||
if desired_val != current_val {
|
||||
cmd.env(&key, desired_val);
|
||||
}
|
||||
}
|
||||
// If the variable was not in `env_map`, we leave it unchanged.
|
||||
}
|
||||
|
||||
// Any entries still left in `remaining_overrides` were not present in
|
||||
// the parent environment. Add them now so that the child process sees
|
||||
// the complete set requested by the caller.
|
||||
for (key, val) in remaining_overrides {
|
||||
cmd.env(key, val);
|
||||
}
|
||||
|
||||
if !$sandbox_policy.has_full_network_access() {
|
||||
cmd.env(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR, "1");
|
||||
}
|
||||
@@ -313,8 +353,9 @@ pub(crate) async fn spawn_child_async(
|
||||
cwd: PathBuf,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
let mut cmd = configure_command!(Command, command, cwd, sandbox_policy, stdio_policy)?;
|
||||
let mut cmd = configure_command!(Command, command, cwd, sandbox_policy, stdio_policy, env)?;
|
||||
cmd.kill_on_drop(true).spawn()
|
||||
}
|
||||
|
||||
@@ -326,13 +367,15 @@ pub fn spawn_child_sync(
|
||||
cwd: PathBuf,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<std::process::Child> {
|
||||
let mut cmd = configure_command!(
|
||||
std::process::Command,
|
||||
command,
|
||||
cwd,
|
||||
sandbox_policy,
|
||||
stdio_policy
|
||||
stdio_policy,
|
||||
env
|
||||
)?;
|
||||
cmd.spawn()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user