chore: sandbox refactor 2 (#4653)
Revert the revert and fix the UI issue
This commit is contained in:
101
codex-rs/core/src/executor/backends.rs
Normal file
101
codex-rs/core/src/executor/backends.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::CODEX_APPLY_PATCH_ARG1;
|
||||
use crate::apply_patch::ApplyPatchExec;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
|
||||
pub(crate) enum ExecutionMode {
|
||||
Shell,
|
||||
ApplyPatch(ApplyPatchExec),
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
/// Backend-specific hooks that prepare and post-process execution requests for a
|
||||
/// given [`ExecutionMode`].
|
||||
pub(crate) trait ExecutionBackend: Send + Sync {
|
||||
fn prepare(
|
||||
&self,
|
||||
params: ExecParams,
|
||||
// Required for downcasting the apply_patch.
|
||||
mode: &ExecutionMode,
|
||||
) -> Result<ExecParams, FunctionCallError>;
|
||||
|
||||
fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
static SHELL_BACKEND: ShellBackend = ShellBackend;
|
||||
static APPLY_PATCH_BACKEND: ApplyPatchBackend = ApplyPatchBackend;
|
||||
|
||||
pub(crate) fn backend_for_mode(mode: &ExecutionMode) -> &'static dyn ExecutionBackend {
|
||||
match mode {
|
||||
ExecutionMode::Shell => &SHELL_BACKEND,
|
||||
ExecutionMode::ApplyPatch(_) => &APPLY_PATCH_BACKEND,
|
||||
}
|
||||
}
|
||||
|
||||
struct ShellBackend;
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionBackend for ShellBackend {
|
||||
fn prepare(
|
||||
&self,
|
||||
params: ExecParams,
|
||||
mode: &ExecutionMode,
|
||||
) -> Result<ExecParams, FunctionCallError> {
|
||||
match mode {
|
||||
ExecutionMode::Shell => Ok(params),
|
||||
_ => Err(FunctionCallError::RespondToModel(
|
||||
"shell backend invoked with non-shell mode".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ApplyPatchBackend;
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionBackend for ApplyPatchBackend {
|
||||
fn prepare(
|
||||
&self,
|
||||
params: ExecParams,
|
||||
mode: &ExecutionMode,
|
||||
) -> Result<ExecParams, FunctionCallError> {
|
||||
match mode {
|
||||
ExecutionMode::ApplyPatch(exec) => {
|
||||
let path_to_codex = env::current_exe()
|
||||
.ok()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.ok_or_else(|| {
|
||||
FunctionCallError::RespondToModel(
|
||||
"failed to determine path to codex executable".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let patch = exec.action.patch.clone();
|
||||
Ok(ExecParams {
|
||||
command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
|
||||
cwd: exec.action.cwd.clone(),
|
||||
timeout_ms: params.timeout_ms,
|
||||
// Run apply_patch with a minimal environment for determinism and to
|
||||
// avoid leaking host environment variables into the patch process.
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification,
|
||||
})
|
||||
}
|
||||
ExecutionMode::Shell => Err(FunctionCallError::RespondToModel(
|
||||
"apply_patch backend invoked without patch context".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
51
codex-rs/core/src/executor/cache.rs
Normal file
51
codex-rs/core/src/executor/cache.rs
Normal file
@@ -0,0 +1,51 @@
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
/// Thread-safe store of user approvals so repeated commands can reuse
|
||||
/// previously granted trust.
|
||||
pub(crate) struct ApprovalCache {
|
||||
inner: Arc<Mutex<HashSet<Vec<String>>>>,
|
||||
}
|
||||
|
||||
impl ApprovalCache {
|
||||
pub(crate) fn insert(&self, command: Vec<String>) {
|
||||
if command.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Ok(mut guard) = self.inner.lock() {
|
||||
guard.insert(command);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn snapshot(&self) -> HashSet<Vec<String>> {
|
||||
self.inner.lock().map(|g| g.clone()).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn insert_ignores_empty_and_dedupes() {
|
||||
let cache = ApprovalCache::default();
|
||||
|
||||
// Empty should be ignored
|
||||
cache.insert(vec![]);
|
||||
assert!(cache.snapshot().is_empty());
|
||||
|
||||
// Insert a command and verify snapshot contains it
|
||||
let cmd = vec!["foo".to_string(), "bar".to_string()];
|
||||
cache.insert(cmd.clone());
|
||||
let snap1 = cache.snapshot();
|
||||
assert!(snap1.contains(&cmd));
|
||||
|
||||
// Reinserting should not create duplicates
|
||||
cache.insert(cmd);
|
||||
let snap2 = cache.snapshot();
|
||||
assert_eq!(snap1, snap2);
|
||||
}
|
||||
}
|
||||
64
codex-rs/core/src/executor/mod.rs
Normal file
64
codex-rs/core/src/executor/mod.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
mod backends;
|
||||
mod cache;
|
||||
mod runner;
|
||||
mod sandbox;
|
||||
|
||||
pub(crate) use backends::ExecutionMode;
|
||||
pub(crate) use runner::ExecutionRequest;
|
||||
pub(crate) use runner::Executor;
|
||||
pub(crate) use runner::ExecutorConfig;
|
||||
pub(crate) use runner::normalize_exec_result;
|
||||
|
||||
pub(crate) mod linkers {
|
||||
use crate::codex::ExecCommandContext;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec::StdoutStream;
|
||||
use crate::executor::backends::ExecutionMode;
|
||||
use crate::executor::runner::ExecutionRequest;
|
||||
|
||||
pub struct PreparedExec {
|
||||
pub(crate) context: ExecCommandContext,
|
||||
pub(crate) request: ExecutionRequest,
|
||||
}
|
||||
|
||||
impl PreparedExec {
|
||||
pub fn new(
|
||||
context: ExecCommandContext,
|
||||
params: ExecParams,
|
||||
approval_command: Vec<String>,
|
||||
mode: ExecutionMode,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
use_shell_profile: bool,
|
||||
) -> Self {
|
||||
let request = ExecutionRequest {
|
||||
params,
|
||||
approval_command,
|
||||
mode,
|
||||
stdout_stream,
|
||||
use_shell_profile,
|
||||
};
|
||||
|
||||
Self { context, request }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod errors {
|
||||
use crate::error::CodexErr;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ExecError {
|
||||
#[error(transparent)]
|
||||
Function(#[from] FunctionCallError),
|
||||
#[error(transparent)]
|
||||
Codex(#[from] CodexErr),
|
||||
}
|
||||
|
||||
impl ExecError {
|
||||
pub(crate) fn rejection(msg: impl Into<String>) -> Self {
|
||||
FunctionCallError::RespondToModel(msg.into()).into()
|
||||
}
|
||||
}
|
||||
}
|
||||
408
codex-rs/core/src/executor/runner.rs
Normal file
408
codex-rs/core/src/executor/runner.rs
Normal file
@@ -0,0 +1,408 @@
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::RwLock;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::backends::ExecutionMode;
|
||||
use super::backends::backend_for_mode;
|
||||
use super::cache::ApprovalCache;
|
||||
use crate::codex::ExecCommandContext;
|
||||
use crate::codex::Session;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::error::get_error_message_ui;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::StdoutStream;
|
||||
use crate::exec::StreamOutput;
|
||||
use crate::exec::process_exec_tool_call;
|
||||
use crate::executor::errors::ExecError;
|
||||
use crate::executor::sandbox::select_sandbox;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::ReviewDecision;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::shell;
|
||||
use codex_otel::otel_event_manager::ToolDecisionSource;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct ExecutorConfig {
|
||||
pub(crate) sandbox_policy: SandboxPolicy,
|
||||
pub(crate) sandbox_cwd: PathBuf,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl ExecutorConfig {
|
||||
pub(crate) fn new(
|
||||
sandbox_policy: SandboxPolicy,
|
||||
sandbox_cwd: PathBuf,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
) -> Self {
|
||||
Self {
|
||||
sandbox_policy,
|
||||
sandbox_cwd,
|
||||
codex_linux_sandbox_exe,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Coordinates sandbox selection, backend-specific preparation, and command
|
||||
/// execution for tool calls requested by the model.
|
||||
pub(crate) struct Executor {
|
||||
approval_cache: ApprovalCache,
|
||||
config: Arc<RwLock<ExecutorConfig>>,
|
||||
}
|
||||
|
||||
impl Executor {
|
||||
pub(crate) fn new(config: ExecutorConfig) -> Self {
|
||||
Self {
|
||||
approval_cache: ApprovalCache::default(),
|
||||
config: Arc::new(RwLock::new(config)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Updates the sandbox policy and working directory used for future
|
||||
/// executions without recreating the executor.
|
||||
pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) {
|
||||
if let Ok(mut cfg) = self.config.write() {
|
||||
cfg.sandbox_policy = sandbox_policy;
|
||||
cfg.sandbox_cwd = sandbox_cwd;
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs a prepared execution request end-to-end: prepares parameters, decides on
|
||||
/// sandbox placement (prompting the user when necessary), launches the command,
|
||||
/// and lets the backend post-process the final output.
|
||||
pub(crate) async fn run(
|
||||
&self,
|
||||
mut request: ExecutionRequest,
|
||||
session: &Session,
|
||||
approval_policy: AskForApproval,
|
||||
context: &ExecCommandContext,
|
||||
) -> Result<ExecToolCallOutput, ExecError> {
|
||||
if matches!(request.mode, ExecutionMode::Shell) {
|
||||
request.params =
|
||||
maybe_translate_shell_command(request.params, session, request.use_shell_profile);
|
||||
}
|
||||
|
||||
// Step 1: Normalise parameters via the selected backend.
|
||||
let backend = backend_for_mode(&request.mode);
|
||||
let stdout_stream = if backend.stream_stdout(&request.mode) {
|
||||
request.stdout_stream.clone()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
request.params = backend
|
||||
.prepare(request.params, &request.mode)
|
||||
.map_err(ExecError::from)?;
|
||||
|
||||
// Step 2: Snapshot sandbox configuration so it stays stable for this run.
|
||||
let config = self
|
||||
.config
|
||||
.read()
|
||||
.map_err(|_| ExecError::rejection("executor config poisoned"))?
|
||||
.clone();
|
||||
|
||||
// Step 3: Decide sandbox placement, prompting for approval when needed.
|
||||
let sandbox_decision = select_sandbox(
|
||||
&request,
|
||||
approval_policy,
|
||||
self.approval_cache.snapshot(),
|
||||
&config,
|
||||
session,
|
||||
&context.sub_id,
|
||||
&context.call_id,
|
||||
&context.otel_event_manager,
|
||||
)
|
||||
.await?;
|
||||
if sandbox_decision.record_session_approval {
|
||||
self.approval_cache.insert(request.approval_command.clone());
|
||||
}
|
||||
|
||||
// Step 4: Launch the command within the chosen sandbox.
|
||||
let first_attempt = self
|
||||
.spawn(
|
||||
request.params.clone(),
|
||||
sandbox_decision.initial_sandbox,
|
||||
&config,
|
||||
stdout_stream.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry.
|
||||
match first_attempt {
|
||||
Ok(output) => Ok(output),
|
||||
Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => {
|
||||
Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into())
|
||||
}
|
||||
Err(CodexErr::Sandbox(error)) => {
|
||||
if sandbox_decision.escalate_on_failure {
|
||||
self.retry_without_sandbox(
|
||||
&request,
|
||||
&config,
|
||||
session,
|
||||
context,
|
||||
stdout_stream,
|
||||
error,
|
||||
)
|
||||
.await
|
||||
} else {
|
||||
let message = sandbox_failure_message(error);
|
||||
Err(ExecError::rejection(message))
|
||||
}
|
||||
}
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Fallback path invoked when a sandboxed run is denied so the user can
|
||||
/// approve rerunning without isolation.
|
||||
async fn retry_without_sandbox(
|
||||
&self,
|
||||
request: &ExecutionRequest,
|
||||
config: &ExecutorConfig,
|
||||
session: &Session,
|
||||
context: &ExecCommandContext,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
sandbox_error: SandboxErr,
|
||||
) -> Result<ExecToolCallOutput, ExecError> {
|
||||
session
|
||||
.notify_background_event(
|
||||
&context.sub_id,
|
||||
format!("Execution failed: {sandbox_error}"),
|
||||
)
|
||||
.await;
|
||||
let decision = session
|
||||
.request_command_approval(
|
||||
context.sub_id.to_string(),
|
||||
context.call_id.to_string(),
|
||||
request.approval_command.clone(),
|
||||
request.params.cwd.clone(),
|
||||
Some("command failed; retry without sandbox?".to_string()),
|
||||
)
|
||||
.await;
|
||||
|
||||
context.otel_event_manager.tool_decision(
|
||||
&context.tool_name,
|
||||
&context.call_id,
|
||||
decision,
|
||||
ToolDecisionSource::User,
|
||||
);
|
||||
match decision {
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
|
||||
if matches!(decision, ReviewDecision::ApprovedForSession) {
|
||||
self.approval_cache.insert(request.approval_command.clone());
|
||||
}
|
||||
session
|
||||
.notify_background_event(&context.sub_id, "retrying command without sandbox")
|
||||
.await;
|
||||
|
||||
let retry_output = self
|
||||
.spawn(
|
||||
request.params.clone(),
|
||||
SandboxType::None,
|
||||
config,
|
||||
stdout_stream,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(retry_output)
|
||||
}
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
Err(ExecError::rejection("exec command rejected by user"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn spawn(
|
||||
&self,
|
||||
params: ExecParams,
|
||||
sandbox: SandboxType,
|
||||
config: &ExecutorConfig,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
) -> Result<ExecToolCallOutput, CodexErr> {
|
||||
process_exec_tool_call(
|
||||
params,
|
||||
sandbox,
|
||||
&config.sandbox_policy,
|
||||
&config.sandbox_cwd,
|
||||
&config.codex_linux_sandbox_exe,
|
||||
stdout_stream,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_translate_shell_command(
|
||||
params: ExecParams,
|
||||
session: &Session,
|
||||
use_shell_profile: bool,
|
||||
) -> ExecParams {
|
||||
let should_translate =
|
||||
matches!(session.user_shell(), shell::Shell::PowerShell(_)) || use_shell_profile;
|
||||
|
||||
if should_translate
|
||||
&& let Some(command) = session
|
||||
.user_shell()
|
||||
.format_default_shell_invocation(params.command.clone())
|
||||
{
|
||||
return ExecParams { command, ..params };
|
||||
}
|
||||
|
||||
params
|
||||
}
|
||||
|
||||
fn sandbox_failure_message(error: SandboxErr) -> String {
|
||||
let codex_error = CodexErr::Sandbox(error);
|
||||
let friendly = get_error_message_ui(&codex_error);
|
||||
format!("failed in sandbox: {friendly}")
|
||||
}
|
||||
|
||||
pub(crate) struct ExecutionRequest {
|
||||
pub params: ExecParams,
|
||||
pub approval_command: Vec<String>,
|
||||
pub mode: ExecutionMode,
|
||||
pub stdout_stream: Option<StdoutStream>,
|
||||
pub use_shell_profile: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct NormalizedExecOutput<'a> {
|
||||
borrowed: Option<&'a ExecToolCallOutput>,
|
||||
synthetic: Option<ExecToolCallOutput>,
|
||||
}
|
||||
|
||||
impl<'a> NormalizedExecOutput<'a> {
|
||||
pub(crate) fn event_output(&'a self) -> &'a ExecToolCallOutput {
|
||||
match (self.borrowed, self.synthetic.as_ref()) {
|
||||
(Some(output), _) => output,
|
||||
(None, Some(output)) => output,
|
||||
(None, None) => unreachable!("normalized exec output missing data"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a raw execution result into a uniform view that always exposes an
|
||||
/// [`ExecToolCallOutput`], synthesizing error output when the command fails
|
||||
/// before producing a response.
|
||||
pub(crate) fn normalize_exec_result(
|
||||
result: &Result<ExecToolCallOutput, ExecError>,
|
||||
) -> NormalizedExecOutput<'_> {
|
||||
match result {
|
||||
Ok(output) => NormalizedExecOutput {
|
||||
borrowed: Some(output),
|
||||
synthetic: None,
|
||||
},
|
||||
Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => {
|
||||
NormalizedExecOutput {
|
||||
borrowed: Some(output.as_ref()),
|
||||
synthetic: None,
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
let message = match err {
|
||||
ExecError::Function(FunctionCallError::RespondToModel(msg)) => msg.clone(),
|
||||
ExecError::Codex(e) => get_error_message_ui(e),
|
||||
};
|
||||
let synthetic = ExecToolCallOutput {
|
||||
exit_code: -1,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new(message.clone()),
|
||||
aggregated_output: StreamOutput::new(message),
|
||||
duration: Duration::default(),
|
||||
timed_out: false,
|
||||
};
|
||||
NormalizedExecOutput {
|
||||
borrowed: None,
|
||||
synthetic: Some(synthetic),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::EnvVarError;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec::StreamOutput;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn make_output(text: &str) -> ExecToolCallOutput {
|
||||
ExecToolCallOutput {
|
||||
exit_code: 1,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new(text.to_string()),
|
||||
duration: Duration::from_millis(123),
|
||||
timed_out: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_success_borrows() {
|
||||
let out = make_output("ok");
|
||||
let result: Result<ExecToolCallOutput, ExecError> = Ok(out);
|
||||
let normalized = normalize_exec_result(&result);
|
||||
assert_eq!(normalized.event_output().aggregated_output.text, "ok");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_timeout_borrows_embedded_output() {
|
||||
let out = make_output("timed out payload");
|
||||
let err = CodexErr::Sandbox(SandboxErr::Timeout {
|
||||
output: Box::new(out),
|
||||
});
|
||||
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(err));
|
||||
let normalized = normalize_exec_result(&result);
|
||||
assert_eq!(
|
||||
normalized.event_output().aggregated_output.text,
|
||||
"timed out payload"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_failure_message_uses_denied_stderr() {
|
||||
let output = ExecToolCallOutput {
|
||||
exit_code: 101,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new("sandbox stderr".to_string()),
|
||||
aggregated_output: StreamOutput::new(String::new()),
|
||||
duration: Duration::from_millis(10),
|
||||
timed_out: false,
|
||||
};
|
||||
let err = SandboxErr::Denied {
|
||||
output: Box::new(output),
|
||||
};
|
||||
let message = sandbox_failure_message(err);
|
||||
assert_eq!(message, "failed in sandbox: sandbox stderr");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_function_error_synthesizes_payload() {
|
||||
let err = FunctionCallError::RespondToModel("boom".to_string());
|
||||
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Function(err));
|
||||
let normalized = normalize_exec_result(&result);
|
||||
assert_eq!(normalized.event_output().aggregated_output.text, "boom");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_codex_error_synthesizes_user_message() {
|
||||
// Use a simple EnvVar error which formats to a clear message
|
||||
let e = CodexErr::EnvVar(EnvVarError {
|
||||
var: "FOO".to_string(),
|
||||
instructions: Some("set it".to_string()),
|
||||
});
|
||||
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(e));
|
||||
let normalized = normalize_exec_result(&result);
|
||||
assert!(
|
||||
normalized
|
||||
.event_output()
|
||||
.aggregated_output
|
||||
.text
|
||||
.contains("Missing environment variable: `FOO`"),
|
||||
"expected synthesized user-friendly message"
|
||||
);
|
||||
}
|
||||
}
|
||||
405
codex-rs/core/src/executor/sandbox.rs
Normal file
405
codex-rs/core/src/executor/sandbox.rs
Normal file
@@ -0,0 +1,405 @@
|
||||
use crate::apply_patch::ApplyPatchExec;
|
||||
use crate::codex::Session;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::executor::ExecutionMode;
|
||||
use crate::executor::ExecutionRequest;
|
||||
use crate::executor::ExecutorConfig;
|
||||
use crate::executor::errors::ExecError;
|
||||
use crate::safety::SafetyCheck;
|
||||
use crate::safety::assess_command_safety;
|
||||
use crate::safety::assess_patch_safety;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_otel::otel_event_manager::ToolDecisionSource;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Sandbox placement options selected for an execution run, including whether
|
||||
/// to escalate after failures and whether approvals should persist.
|
||||
pub(crate) struct SandboxDecision {
|
||||
pub(crate) initial_sandbox: SandboxType,
|
||||
pub(crate) escalate_on_failure: bool,
|
||||
pub(crate) record_session_approval: bool,
|
||||
}
|
||||
|
||||
impl SandboxDecision {
|
||||
fn auto(sandbox: SandboxType, escalate_on_failure: bool) -> Self {
|
||||
Self {
|
||||
initial_sandbox: sandbox,
|
||||
escalate_on_failure,
|
||||
record_session_approval: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn user_override(record_session_approval: bool) -> Self {
|
||||
Self {
|
||||
initial_sandbox: SandboxType::None,
|
||||
escalate_on_failure: false,
|
||||
record_session_approval,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
|
||||
matches!(
|
||||
(approval, sandbox),
|
||||
(
|
||||
AskForApproval::UnlessTrusted | AskForApproval::OnFailure,
|
||||
SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/// Determines how a command should be sandboxed, prompting the user when
|
||||
/// policy requires explicit approval.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn select_sandbox(
|
||||
request: &ExecutionRequest,
|
||||
approval_policy: AskForApproval,
|
||||
approval_cache: HashSet<Vec<String>>,
|
||||
config: &ExecutorConfig,
|
||||
session: &Session,
|
||||
sub_id: &str,
|
||||
call_id: &str,
|
||||
otel_event_manager: &OtelEventManager,
|
||||
) -> Result<SandboxDecision, ExecError> {
|
||||
match &request.mode {
|
||||
ExecutionMode::Shell => {
|
||||
select_shell_sandbox(
|
||||
request,
|
||||
approval_policy,
|
||||
approval_cache,
|
||||
config,
|
||||
session,
|
||||
sub_id,
|
||||
call_id,
|
||||
otel_event_manager,
|
||||
)
|
||||
.await
|
||||
}
|
||||
ExecutionMode::ApplyPatch(exec) => {
|
||||
select_apply_patch_sandbox(exec, approval_policy, config)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn select_shell_sandbox(
|
||||
request: &ExecutionRequest,
|
||||
approval_policy: AskForApproval,
|
||||
approved_snapshot: HashSet<Vec<String>>,
|
||||
config: &ExecutorConfig,
|
||||
session: &Session,
|
||||
sub_id: &str,
|
||||
call_id: &str,
|
||||
otel_event_manager: &OtelEventManager,
|
||||
) -> Result<SandboxDecision, ExecError> {
|
||||
let command_for_safety = if request.approval_command.is_empty() {
|
||||
request.params.command.clone()
|
||||
} else {
|
||||
request.approval_command.clone()
|
||||
};
|
||||
|
||||
let safety = assess_command_safety(
|
||||
&command_for_safety,
|
||||
approval_policy,
|
||||
&config.sandbox_policy,
|
||||
&approved_snapshot,
|
||||
request.params.with_escalated_permissions.unwrap_or(false),
|
||||
);
|
||||
|
||||
match safety {
|
||||
SafetyCheck::AutoApprove {
|
||||
sandbox_type,
|
||||
user_explicitly_approved,
|
||||
} => {
|
||||
let mut decision = SandboxDecision::auto(
|
||||
sandbox_type,
|
||||
should_escalate_on_failure(approval_policy, sandbox_type),
|
||||
);
|
||||
if user_explicitly_approved {
|
||||
decision.record_session_approval = true;
|
||||
}
|
||||
let (decision_for_event, source) = if user_explicitly_approved {
|
||||
(ReviewDecision::ApprovedForSession, ToolDecisionSource::User)
|
||||
} else {
|
||||
(ReviewDecision::Approved, ToolDecisionSource::Config)
|
||||
};
|
||||
otel_event_manager.tool_decision("local_shell", call_id, decision_for_event, source);
|
||||
Ok(decision)
|
||||
}
|
||||
SafetyCheck::AskUser => {
|
||||
let decision = session
|
||||
.request_command_approval(
|
||||
sub_id.to_string(),
|
||||
call_id.to_string(),
|
||||
request.approval_command.clone(),
|
||||
request.params.cwd.clone(),
|
||||
request.params.justification.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
otel_event_manager.tool_decision(
|
||||
"local_shell",
|
||||
call_id,
|
||||
decision,
|
||||
ToolDecisionSource::User,
|
||||
);
|
||||
match decision {
|
||||
ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)),
|
||||
ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)),
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
Err(ExecError::rejection("exec command rejected by user"))
|
||||
}
|
||||
}
|
||||
}
|
||||
SafetyCheck::Reject { reason } => Err(ExecError::rejection(format!(
|
||||
"exec command rejected: {reason}"
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn select_apply_patch_sandbox(
|
||||
exec: &ApplyPatchExec,
|
||||
approval_policy: AskForApproval,
|
||||
config: &ExecutorConfig,
|
||||
) -> Result<SandboxDecision, ExecError> {
|
||||
if exec.user_explicitly_approved_this_action {
|
||||
return Ok(SandboxDecision::user_override(false));
|
||||
}
|
||||
|
||||
match assess_patch_safety(
|
||||
&exec.action,
|
||||
approval_policy,
|
||||
&config.sandbox_policy,
|
||||
&config.sandbox_cwd,
|
||||
) {
|
||||
SafetyCheck::AutoApprove { sandbox_type, .. } => Ok(SandboxDecision::auto(
|
||||
sandbox_type,
|
||||
should_escalate_on_failure(approval_policy, sandbox_type),
|
||||
)),
|
||||
SafetyCheck::AskUser => Err(ExecError::rejection(
|
||||
"patch requires approval but none was recorded",
|
||||
)),
|
||||
SafetyCheck::Reject { reason } => {
|
||||
Err(ExecError::rejection(format!("patch rejected: {reason}")))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::codex::make_session_and_context;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use codex_apply_patch::ApplyPatchAction;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_apply_patch_user_override_when_explicit() {
|
||||
let (session, ctx) = make_session_and_context();
|
||||
let tmp = tempfile::tempdir().expect("tmp");
|
||||
let p = tmp.path().join("a.txt");
|
||||
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
|
||||
let exec = ApplyPatchExec {
|
||||
action,
|
||||
user_explicitly_approved_this_action: true,
|
||||
};
|
||||
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
|
||||
let request = ExecutionRequest {
|
||||
params: ExecParams {
|
||||
command: vec!["apply_patch".into()],
|
||||
cwd: std::env::temp_dir(),
|
||||
timeout_ms: None,
|
||||
env: std::collections::HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
approval_command: vec!["apply_patch".into()],
|
||||
mode: ExecutionMode::ApplyPatch(exec),
|
||||
stdout_stream: None,
|
||||
use_shell_profile: false,
|
||||
};
|
||||
let otel_event_manager = ctx.client.get_otel_event_manager();
|
||||
let decision = select_sandbox(
|
||||
&request,
|
||||
AskForApproval::OnRequest,
|
||||
Default::default(),
|
||||
&cfg,
|
||||
&session,
|
||||
"sub",
|
||||
"call",
|
||||
&otel_event_manager,
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
// Explicit user override runs without sandbox
|
||||
assert_eq!(decision.initial_sandbox, SandboxType::None);
|
||||
assert_eq!(decision.escalate_on_failure, false);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_apply_patch_autoapprove_in_danger() {
|
||||
let (session, ctx) = make_session_and_context();
|
||||
let tmp = tempfile::tempdir().expect("tmp");
|
||||
let p = tmp.path().join("a.txt");
|
||||
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
|
||||
let exec = ApplyPatchExec {
|
||||
action,
|
||||
user_explicitly_approved_this_action: false,
|
||||
};
|
||||
let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
|
||||
let request = ExecutionRequest {
|
||||
params: ExecParams {
|
||||
command: vec!["apply_patch".into()],
|
||||
cwd: std::env::temp_dir(),
|
||||
timeout_ms: None,
|
||||
env: std::collections::HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
approval_command: vec!["apply_patch".into()],
|
||||
mode: ExecutionMode::ApplyPatch(exec),
|
||||
stdout_stream: None,
|
||||
use_shell_profile: false,
|
||||
};
|
||||
let otel_event_manager = ctx.client.get_otel_event_manager();
|
||||
let decision = select_sandbox(
|
||||
&request,
|
||||
AskForApproval::OnRequest,
|
||||
Default::default(),
|
||||
&cfg,
|
||||
&session,
|
||||
"sub",
|
||||
"call",
|
||||
&otel_event_manager,
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
// On platforms with a sandbox, DangerFullAccess still prefers it
|
||||
let expected = crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None);
|
||||
assert_eq!(decision.initial_sandbox, expected);
|
||||
assert_eq!(decision.escalate_on_failure, false);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_apply_patch_requires_approval_on_unless_trusted() {
|
||||
let (session, ctx) = make_session_and_context();
|
||||
let tempdir = tempfile::tempdir().expect("tmpdir");
|
||||
let p = tempdir.path().join("a.txt");
|
||||
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
|
||||
let exec = ApplyPatchExec {
|
||||
action,
|
||||
user_explicitly_approved_this_action: false,
|
||||
};
|
||||
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
|
||||
let request = ExecutionRequest {
|
||||
params: ExecParams {
|
||||
command: vec!["apply_patch".into()],
|
||||
cwd: std::env::temp_dir(),
|
||||
timeout_ms: None,
|
||||
env: std::collections::HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
approval_command: vec!["apply_patch".into()],
|
||||
mode: ExecutionMode::ApplyPatch(exec),
|
||||
stdout_stream: None,
|
||||
use_shell_profile: false,
|
||||
};
|
||||
let otel_event_manager = ctx.client.get_otel_event_manager();
|
||||
let result = select_sandbox(
|
||||
&request,
|
||||
AskForApproval::UnlessTrusted,
|
||||
Default::default(),
|
||||
&cfg,
|
||||
&session,
|
||||
"sub",
|
||||
"call",
|
||||
&otel_event_manager,
|
||||
)
|
||||
.await;
|
||||
match result {
|
||||
Ok(_) => panic!("expected error"),
|
||||
Err(ExecError::Function(FunctionCallError::RespondToModel(msg))) => {
|
||||
assert!(msg.contains("requires approval"))
|
||||
}
|
||||
Err(other) => panic!("unexpected error: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_shell_autoapprove_in_danger_mode() {
|
||||
let (session, ctx) = make_session_and_context();
|
||||
let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
|
||||
let request = ExecutionRequest {
|
||||
params: ExecParams {
|
||||
command: vec!["some-unknown".into()],
|
||||
cwd: std::env::temp_dir(),
|
||||
timeout_ms: None,
|
||||
env: std::collections::HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
approval_command: vec!["some-unknown".into()],
|
||||
mode: ExecutionMode::Shell,
|
||||
stdout_stream: None,
|
||||
use_shell_profile: false,
|
||||
};
|
||||
let otel_event_manager = ctx.client.get_otel_event_manager();
|
||||
let decision = select_sandbox(
|
||||
&request,
|
||||
AskForApproval::OnRequest,
|
||||
Default::default(),
|
||||
&cfg,
|
||||
&session,
|
||||
"sub",
|
||||
"call",
|
||||
&otel_event_manager,
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
assert_eq!(decision.initial_sandbox, SandboxType::None);
|
||||
assert_eq!(decision.escalate_on_failure, false);
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "macos", target_os = "linux"))]
|
||||
#[tokio::test]
|
||||
async fn select_shell_escalates_on_failure_with_platform_sandbox() {
|
||||
let (session, ctx) = make_session_and_context();
|
||||
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
|
||||
let request = ExecutionRequest {
|
||||
params: ExecParams {
|
||||
// Unknown command => untrusted but not flagged dangerous
|
||||
command: vec!["some-unknown".into()],
|
||||
cwd: std::env::temp_dir(),
|
||||
timeout_ms: None,
|
||||
env: std::collections::HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
approval_command: vec!["some-unknown".into()],
|
||||
mode: ExecutionMode::Shell,
|
||||
stdout_stream: None,
|
||||
use_shell_profile: false,
|
||||
};
|
||||
let otel_event_manager = ctx.client.get_otel_event_manager();
|
||||
let decision = select_sandbox(
|
||||
&request,
|
||||
AskForApproval::OnFailure,
|
||||
Default::default(),
|
||||
&cfg,
|
||||
&session,
|
||||
"sub",
|
||||
"call",
|
||||
&otel_event_manager,
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
// On macOS/Linux we should have a platform sandbox and escalate on failure
|
||||
assert_ne!(decision.initial_sandbox, SandboxType::None);
|
||||
assert_eq!(decision.escalate_on_failure, true);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user