chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
This commit is contained in:
jif-oai
2025-10-20 20:57:37 +01:00
committed by GitHub
parent c84fc83222
commit 5e4f3bbb0b
59 changed files with 2630 additions and 3374 deletions

11
codex-rs/Cargo.lock generated
View File

@@ -1056,6 +1056,7 @@ dependencies = [
"codex-otel",
"codex-protocol",
"codex-rmcp-client",
"codex-utils-pty",
"codex-utils-string",
"core-foundation 0.9.4",
"core_test_support",
@@ -1072,7 +1073,6 @@ dependencies = [
"mcp-types",
"openssl-sys",
"os_info",
"portable-pty",
"predicates",
"pretty_assertions",
"rand 0.9.2",
@@ -1484,6 +1484,15 @@ dependencies = [
"toml",
]
[[package]]
name = "codex-utils-pty"
version = "0.0.0"
dependencies = [
"anyhow",
"portable-pty",
"tokio",
]
[[package]]
name = "codex-utils-readiness"
version = "0.0.0"

View File

@@ -35,6 +35,7 @@ members = [
"git-apply",
"utils/json-to-toml",
"utils/readiness",
"utils/pty",
"utils/string",
]
resolver = "2"
@@ -78,6 +79,7 @@ codex-stdio-to-uds = { path = "stdio-to-uds" }
codex-tui = { path = "tui" }
codex-utils-json-to-toml = { path = "utils/json-to-toml" }
codex-utils-readiness = { path = "utils/readiness" }
codex-utils-pty = { path = "utils/pty" }
codex-utils-string = { path = "utils/string" }
core_test_support = { path = "core/tests/common" }
mcp-types = { path = "mcp-types" }

View File

@@ -631,6 +631,7 @@ impl CodexMessageProcessor {
env,
with_escalated_permissions: None,
justification: None,
arg0: None,
};
let effective_policy = params

View File

@@ -28,6 +28,7 @@ codex-protocol = { workspace = true }
codex-rmcp-client = { workspace = true }
codex-async-utils = { workspace = true }
codex-utils-string = { workspace = true }
codex-utils-pty = { workspace = true }
dirs = { workspace = true }
dunce = { workspace = true }
env-flags = { workspace = true }
@@ -37,7 +38,6 @@ indexmap = { workspace = true }
libc = { workspace = true }
mcp-types = { workspace = true }
os_info = { workspace = true }
portable-pty = { workspace = true }
rand = { workspace = true }
regex-lite = { workspace = true }
reqwest = { workspace = true, features = ["json", "stream"] }

View File

@@ -11,7 +11,7 @@ use crate::error::Result;
use crate::error::RetryLimitReachedError;
use crate::error::UnexpectedResponseError;
use crate::model_family::ModelFamily;
use crate::openai_tools::create_tools_json_for_chat_completions_api;
use crate::tools::spec::create_tools_json_for_chat_completions_api;
use crate::util::backoff;
use bytes::Bytes;
use codex_otel::otel_event_manager::OtelEventManager;

View File

@@ -45,12 +45,12 @@ use crate::model_family::ModelFamily;
use crate::model_provider_info::ModelProviderInfo;
use crate::model_provider_info::WireApi;
use crate::openai_model_info::get_model_info;
use crate::openai_tools::create_tools_json_for_responses_api;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::RateLimitWindow;
use crate::protocol::TokenUsage;
use crate::state::TaskKind;
use crate::token_data::PlanType;
use crate::tools::spec::create_tools_json_for_responses_api;
use crate::util::backoff;
use chrono::DateTime;
use chrono::Utc;

View File

@@ -281,7 +281,7 @@ pub(crate) struct ResponsesApiRequest<'a> {
}
pub(crate) mod tools {
use crate::openai_tools::JsonSchema;
use crate::tools::spec::JsonSchema;
use serde::Deserialize;
use serde::Serialize;

View File

@@ -8,6 +8,7 @@ use crate::AuthManager;
use crate::client_common::REVIEW_PROMPT;
use crate::event_mapping::map_response_item_to_event_messages;
use crate::function_tool::FunctionCallError;
use crate::parse_command::parse_command;
use crate::review_format::format_review_findings_block;
use crate::terminal;
use crate::user_notification::UserNotifier;
@@ -56,22 +57,14 @@ use crate::conversation_history::ConversationHistory;
use crate::environment_context::EnvironmentContext;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
use crate::exec::ExecToolCallOutput;
#[cfg(test)]
use crate::exec::StreamOutput;
use crate::exec_command::ExecCommandParams;
use crate::exec_command::ExecSessionManager;
use crate::exec_command::WriteStdinParams;
use crate::executor::Executor;
use crate::executor::ExecutorConfig;
use crate::executor::normalize_exec_result;
// Removed: legacy executor wiring replaced by ToolOrchestrator flows.
// legacy normalize_exec_result no longer used after orchestrator migration
use crate::mcp::auth::compute_auth_statuses;
use crate::mcp_connection_manager::McpConnectionManager;
use crate::model_family::find_family_for_model;
use crate::openai_model_info::get_model_info;
use crate::openai_tools::ToolsConfig;
use crate::openai_tools::ToolsConfigParams;
use crate::parse_command::parse_command;
use crate::project_doc::get_user_instructions;
use crate::protocol::AgentMessageDeltaEvent;
use crate::protocol::AgentReasoningDeltaEvent;
@@ -84,13 +77,9 @@ use crate::protocol::ErrorEvent;
use crate::protocol::Event;
use crate::protocol::EventMsg;
use crate::protocol::ExecApprovalRequestEvent;
use crate::protocol::ExecCommandBeginEvent;
use crate::protocol::ExecCommandEndEvent;
use crate::protocol::InputItem;
use crate::protocol::ListCustomPromptsResponseEvent;
use crate::protocol::Op;
use crate::protocol::PatchApplyBeginEvent;
use crate::protocol::PatchApplyEndEvent;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::ReviewDecision;
use crate::protocol::ReviewOutputEvent;
@@ -114,8 +103,10 @@ use crate::tasks::RegularTask;
use crate::tasks::ReviewTask;
use crate::tools::ToolRouter;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::format_exec_output_str;
use crate::tools::parallel::ToolCallRuntime;
use crate::tools::sandboxing::ApprovalStore;
use crate::tools::spec::ToolsConfig;
use crate::tools::spec::ToolsConfigParams;
use crate::turn_diff_tracker::TurnDiffTracker;
use crate::unified_exec::UnifiedExecSessionManager;
use crate::user_instructions::UserInstructions;
@@ -272,6 +263,7 @@ pub(crate) struct TurnContext {
pub(crate) tools_config: ToolsConfig,
pub(crate) is_review_mode: bool,
pub(crate) final_output_json_schema: Option<Value>,
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
}
impl TurnContext {
@@ -404,6 +396,7 @@ impl Session {
tools_config,
is_review_mode: false,
final_output_json_schema: None,
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
}
}
@@ -561,19 +554,14 @@ impl Session {
let services = SessionServices {
mcp_connection_manager,
session_manager: ExecSessionManager::default(),
unified_exec_manager: UnifiedExecSessionManager::default(),
notifier: UserNotifier::new(config.notify.clone()),
rollout: Mutex::new(Some(rollout_recorder)),
user_shell: default_shell,
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
executor: Executor::new(ExecutorConfig::new(
session_configuration.sandbox_policy.clone(),
session_configuration.cwd.clone(),
config.codex_linux_sandbox_exe.clone(),
)),
auth_manager: Arc::clone(&auth_manager),
otel_event_manager,
tool_approvals: Mutex::new(ApprovalStore::default()),
};
let sess = Arc::new(Session {
@@ -655,13 +643,12 @@ impl Session {
}
pub(crate) async fn new_turn(&self, updates: SessionSettingsUpdate) -> Arc<TurnContext> {
let current_configuration = self.state.lock().await.session_configuration.clone();
let session_configuration = current_configuration.apply(&updates);
self.services.executor.update_environment(
session_configuration.sandbox_policy.clone(),
session_configuration.cwd.clone(),
);
let session_configuration = {
let mut state = self.state.lock().await;
let session_configuration = state.session_configuration.clone().apply(&updates);
state.session_configuration = session_configuration.clone();
session_configuration
};
let mut turn_context: TurnContext = Self::make_turn_context(
Some(Arc::clone(&self.services.auth_manager)),
@@ -965,168 +952,6 @@ impl Session {
}
}
async fn on_exec_command_begin(
&self,
turn_diff_tracker: SharedTurnDiffTracker,
exec_command_context: ExecCommandContext,
) {
let ExecCommandContext {
sub_id,
call_id,
command_for_display,
cwd,
apply_patch,
..
} = exec_command_context;
let msg = match apply_patch {
Some(ApplyPatchCommandContext {
user_explicitly_approved_this_action,
changes,
}) => {
{
let mut tracker = turn_diff_tracker.lock().await;
tracker.on_patch_begin(&changes);
}
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id,
auto_approved: !user_explicitly_approved_this_action,
changes,
})
}
None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id,
command: command_for_display.clone(),
cwd,
parsed_cmd: parse_command(&command_for_display),
}),
};
let event = Event {
id: sub_id.to_string(),
msg,
};
self.send_event(event).await;
}
async fn on_exec_command_end(
&self,
turn_diff_tracker: SharedTurnDiffTracker,
sub_id: &str,
call_id: &str,
output: &ExecToolCallOutput,
is_apply_patch: bool,
) {
let ExecToolCallOutput {
stdout,
stderr,
aggregated_output,
duration,
exit_code,
timed_out: _,
..
} = output;
// Send full stdout/stderr to clients; do not truncate.
let stdout = stdout.text.clone();
let stderr = stderr.text.clone();
let formatted_output = format_exec_output_str(output);
let aggregated_output: String = aggregated_output.text.clone();
let msg = if is_apply_patch {
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
call_id: call_id.to_string(),
stdout,
stderr,
success: *exit_code == 0,
})
} else {
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: call_id.to_string(),
stdout,
stderr,
aggregated_output,
exit_code: *exit_code,
duration: *duration,
formatted_output,
})
};
let event = Event {
id: sub_id.to_string(),
msg,
};
self.send_event(event).await;
// If this is an apply_patch, after we emit the end patch, emit a second event
// with the full turn diff if there is one.
if is_apply_patch {
let unified_diff = {
let mut tracker = turn_diff_tracker.lock().await;
tracker.get_unified_diff()
};
if let Ok(Some(unified_diff)) = unified_diff {
let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
let event = Event {
id: sub_id.into(),
msg,
};
self.send_event(event).await;
}
}
}
/// Runs the exec tool call and emits events for the begin and end of the
/// command even on error.
///
/// Returns the output of the exec tool call.
pub(crate) async fn run_exec_with_events(
&self,
turn_diff_tracker: SharedTurnDiffTracker,
prepared: PreparedExec,
approval_policy: AskForApproval,
) -> Result<ExecToolCallOutput, ExecError> {
let PreparedExec { context, request } = prepared;
let is_apply_patch = context.apply_patch.is_some();
let sub_id = context.sub_id.clone();
let call_id = context.call_id.clone();
let begin_turn_diff = turn_diff_tracker.clone();
let begin_context = context.clone();
let session = self;
let result = self
.services
.executor
.run(request, self, approval_policy, &context, move || {
let turn_diff = begin_turn_diff.clone();
let ctx = begin_context.clone();
async move {
session.on_exec_command_begin(turn_diff, ctx).await;
}
})
.await;
if matches!(
&result,
Err(ExecError::Function(FunctionCallError::Denied(_)))
) {
return result;
}
let normalized = normalize_exec_result(&result);
let borrowed = normalized.event_output();
self.on_exec_command_end(
turn_diff_tracker,
&sub_id,
&call_id,
borrowed,
is_apply_patch,
)
.await;
drop(normalized);
result
}
/// Helper that emits a BackgroundEvent with the given message. This keeps
/// the callsites terse so adding more diagnostics does not clutter the
/// core agent logic.
@@ -1235,43 +1060,6 @@ impl Session {
.parse_tool_name(tool_name)
}
pub(crate) async fn handle_exec_command_tool(
&self,
params: ExecCommandParams,
) -> Result<String, FunctionCallError> {
let result = self
.services
.session_manager
.handle_exec_command_request(params)
.await;
match result {
Ok(output) => Ok(output.to_text_output()),
Err(err) => Err(FunctionCallError::RespondToModel(err)),
}
}
pub(crate) async fn handle_write_stdin_tool(
&self,
params: WriteStdinParams,
) -> Result<String, FunctionCallError> {
self.services
.session_manager
.handle_write_stdin_request(params)
.await
.map(|output| output.to_text_output())
.map_err(FunctionCallError::RespondToModel)
}
pub(crate) async fn run_unified_exec_request(
&self,
request: crate::unified_exec::UnifiedExecRequest<'_>,
) -> Result<crate::unified_exec::UnifiedExecResult, crate::unified_exec::UnifiedExecError> {
self.services
.unified_exec_manager
.handle_request(request)
.await
}
pub async fn interrupt_task(self: &Arc<Self>) {
info!("interrupt received: abort current task, if any");
self.abort_all_tasks(TurnAbortReason::Interrupted).await;
@@ -1633,6 +1421,7 @@ async fn spawn_review_thread(
cwd: parent_turn_context.cwd.clone(),
is_review_mode: true,
final_output_json_schema: None,
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
};
// Seed the child task with the review prompt as the initial user message.
@@ -2511,10 +2300,6 @@ fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
error.to_string().contains("Auth required")
}
use crate::executor::errors::ExecError;
use crate::executor::linkers::PreparedExec;
use crate::tools::context::ApplyPatchCommandContext;
use crate::tools::context::ExecCommandContext;
#[cfg(test)]
pub(crate) use tests::make_session_and_context;
@@ -2523,6 +2308,8 @@ mod tests {
use super::*;
use crate::config::ConfigOverrides;
use crate::config::ConfigToml;
use crate::exec::ExecToolCallOutput;
use crate::tools::format_exec_output_str;
use crate::protocol::CompactedItem;
use crate::protocol::InitialHistory;
@@ -2827,19 +2614,14 @@ mod tests {
let services = SessionServices {
mcp_connection_manager: McpConnectionManager::default(),
session_manager: ExecSessionManager::default(),
unified_exec_manager: UnifiedExecSessionManager::default(),
notifier: UserNotifier::new(None),
rollout: Mutex::new(None),
user_shell: shell::Shell::Unknown,
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
executor: Executor::new(ExecutorConfig::new(
session_configuration.sandbox_policy.clone(),
session_configuration.cwd.clone(),
None,
)),
auth_manager: Arc::clone(&auth_manager),
otel_event_manager: otel_event_manager.clone(),
tool_approvals: Mutex::new(ApprovalStore::default()),
};
let session = Session {
@@ -2898,19 +2680,14 @@ mod tests {
let services = SessionServices {
mcp_connection_manager: McpConnectionManager::default(),
session_manager: ExecSessionManager::default(),
unified_exec_manager: UnifiedExecSessionManager::default(),
notifier: UserNotifier::new(None),
rollout: Mutex::new(None),
user_shell: shell::Shell::Unknown,
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
executor: Executor::new(ExecutorConfig::new(
session_configuration.sandbox_policy.clone(),
session_configuration.cwd.clone(),
None,
)),
auth_manager: Arc::clone(&auth_manager),
otel_event_manager: otel_event_manager.clone(),
tool_approvals: Mutex::new(ApprovalStore::default()),
};
let session = Arc::new(Session {
@@ -3258,6 +3035,7 @@ mod tests {
env: HashMap::new(),
with_escalated_permissions: Some(true),
justification: Some("test".to_string()),
arg0: None,
};
let params2 = ExecParams {

View File

@@ -1,15 +1,25 @@
use crate::bash::parse_bash_lc_plain_commands;
pub fn is_known_safe_command(command: &[String]) -> bool {
let command: Vec<String> = command
.iter()
.map(|s| {
if s == "zsh" {
"bash".to_string()
} else {
s.clone()
}
})
.collect();
#[cfg(target_os = "windows")]
{
use super::windows_safe_commands::is_safe_command_windows;
if is_safe_command_windows(command) {
if is_safe_command_windows(&command) {
return true;
}
}
if is_safe_to_call_with_exec(command) {
if is_safe_to_call_with_exec(&command) {
return true;
}
@@ -19,7 +29,7 @@ pub fn is_known_safe_command(command: &[String]) -> bool {
// introduce side effects ( "&&", "||", ";", and "|" ). If every
// individual command in the script is itself a knownsafe command, then
// the composite expression is considered safe.
if let Some(all_commands) = parse_bash_lc_plain_commands(command)
if let Some(all_commands) = parse_bash_lc_plain_commands(&command)
&& !all_commands.is_empty()
&& all_commands
.iter()
@@ -31,9 +41,14 @@ pub fn is_known_safe_command(command: &[String]) -> bool {
}
fn is_safe_to_call_with_exec(command: &[String]) -> bool {
let cmd0 = command.first().map(String::as_str);
let Some(cmd0) = command.first().map(String::as_str) else {
return false;
};
match cmd0 {
match std::path::Path::new(&cmd0)
.file_name()
.and_then(|osstr| osstr.to_str())
{
#[rustfmt::skip]
Some(
"cat" |
@@ -103,13 +118,12 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
// Rust
Some("cargo") if command.get(1).map(String::as_str) == Some("check") => true,
// Special-case `sed -n {N|M,N}p FILE`
// Special-case `sed -n {N|M,N}p`
Some("sed")
if {
command.len() == 4
command.len() <= 4
&& command.get(1).map(String::as_str) == Some("-n")
&& is_valid_sed_n_arg(command.get(2).map(String::as_str))
&& command.get(3).map(String::is_empty) == Some(false)
} =>
{
true

View File

@@ -18,13 +18,14 @@ use tokio::process::Child;
use crate::error::CodexErr;
use crate::error::Result;
use crate::error::SandboxErr;
use crate::landlock::spawn_command_under_linux_sandbox;
use crate::protocol::Event;
use crate::protocol::EventMsg;
use crate::protocol::ExecCommandOutputDeltaEvent;
use crate::protocol::ExecOutputStream;
use crate::protocol::SandboxPolicy;
use crate::seatbelt::spawn_command_under_seatbelt;
use crate::sandboxing::CommandSpec;
use crate::sandboxing::ExecEnv;
use crate::sandboxing::SandboxManager;
use crate::spawn::StdioPolicy;
use crate::spawn::spawn_child_async;
@@ -53,6 +54,7 @@ pub struct ExecParams {
pub env: HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub arg0: Option<String>,
}
impl ExecParams {
@@ -87,57 +89,85 @@ pub async fn process_exec_tool_call(
codex_linux_sandbox_exe: &Option<PathBuf>,
stdout_stream: Option<StdoutStream>,
) -> Result<ExecToolCallOutput> {
let start = Instant::now();
let ExecParams {
command,
cwd,
timeout_ms,
env,
with_escalated_permissions,
justification,
arg0: _,
} = params;
let timeout_duration = params.timeout_duration();
let (program, args) = command.split_first().ok_or_else(|| {
CodexErr::Io(io::Error::new(
io::ErrorKind::InvalidInput,
"command args are empty",
))
})?;
let raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr> = match sandbox_type
{
SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await,
SandboxType::MacosSeatbelt => {
let ExecParams {
command,
cwd: command_cwd,
env,
..
} = params;
let child = spawn_command_under_seatbelt(
command,
command_cwd,
sandbox_policy,
sandbox_cwd,
StdioPolicy::RedirectForShellTool,
env,
)
.await?;
consume_truncated_output(child, timeout_duration, stdout_stream.clone()).await
}
SandboxType::LinuxSeccomp => {
let ExecParams {
command,
cwd: command_cwd,
env,
..
} = params;
let codex_linux_sandbox_exe = codex_linux_sandbox_exe
.as_ref()
.ok_or(CodexErr::LandlockSandboxExecutableNotProvided)?;
let child = spawn_command_under_linux_sandbox(
codex_linux_sandbox_exe,
command,
command_cwd,
sandbox_policy,
sandbox_cwd,
StdioPolicy::RedirectForShellTool,
env,
)
.await?;
consume_truncated_output(child, timeout_duration, stdout_stream).await
}
let spec = CommandSpec {
program: program.clone(),
args: args.to_vec(),
cwd,
env,
timeout_ms,
with_escalated_permissions,
justification,
};
let manager = SandboxManager::new();
let exec_env = manager
.transform(
&spec,
sandbox_policy,
sandbox_type,
sandbox_cwd,
codex_linux_sandbox_exe.as_ref(),
)
.map_err(CodexErr::from)?;
// Route through the sandboxing module for a single, unified execution path.
crate::sandboxing::execute_env(&exec_env, sandbox_policy, stdout_stream).await
}
pub(crate) async fn execute_exec_env(
env: ExecEnv,
sandbox_policy: &SandboxPolicy,
stdout_stream: Option<StdoutStream>,
) -> Result<ExecToolCallOutput> {
let ExecEnv {
command,
cwd,
env,
timeout_ms,
sandbox,
with_escalated_permissions,
justification,
arg0,
} = env;
let params = ExecParams {
command,
cwd,
timeout_ms,
env,
with_escalated_permissions,
justification,
arg0,
};
let start = Instant::now();
let raw_output_result = exec(params, sandbox_policy, stdout_stream).await;
let duration = start.elapsed();
finalize_exec_result(raw_output_result, sandbox, duration)
}
fn finalize_exec_result(
raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr>,
sandbox_type: SandboxType,
duration: Duration,
) -> Result<ExecToolCallOutput> {
match raw_output_result {
Ok(raw_output) => {
#[allow(unused_mut)]
@@ -192,12 +222,30 @@ pub async fn process_exec_tool_call(
}
}
pub(crate) mod errors {
use super::CodexErr;
use crate::sandboxing::SandboxTransformError;
impl From<SandboxTransformError> for CodexErr {
fn from(err: SandboxTransformError) -> Self {
match err {
SandboxTransformError::MissingLinuxSandboxExecutable => {
CodexErr::LandlockSandboxExecutableNotProvided
}
}
}
}
}
/// We don't have a fully deterministic way to tell if our command failed
/// because of the sandbox - a command in the user's zshrc file might hit an
/// error, but the command itself might fail or succeed for other reasons.
/// For now, we conservatively check for well known command failure exit codes and
/// also look for common sandbox denial keywords in the command output.
fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCallOutput) -> bool {
pub(crate) fn is_likely_sandbox_denied(
sandbox_type: SandboxType,
exec_output: &ExecToolCallOutput,
) -> bool {
if sandbox_type == SandboxType::None || exec_output.exit_code == 0 {
return false;
}
@@ -206,21 +254,17 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal
// 2: misuse of shell builtins
// 126: permission denied
// 127: command not found
const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
return false;
}
const SANDBOX_DENIED_KEYWORDS: [&str; 6] = [
const SANDBOX_DENIED_KEYWORDS: [&str; 7] = [
"operation not permitted",
"permission denied",
"read-only file system",
"seccomp",
"sandbox",
"landlock",
"failed to write file",
];
if [
let has_sandbox_keyword = [
&exec_output.stderr.text,
&exec_output.stdout.text,
&exec_output.aggregated_output.text,
@@ -231,10 +275,17 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal
SANDBOX_DENIED_KEYWORDS
.iter()
.any(|needle| lower.contains(needle))
}) {
});
if has_sandbox_keyword {
return true;
}
const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
return false;
}
#[cfg(unix)]
{
const SIGSYS_CODE: i32 = libc::SIGSYS;
@@ -248,11 +299,12 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal
false
}
#[derive(Debug)]
pub struct StreamOutput<T> {
#[derive(Debug, Clone)]
pub struct StreamOutput<T: Clone> {
pub text: T,
pub truncated_after_lines: Option<u32>,
}
#[derive(Debug)]
struct RawExecToolCallOutput {
pub exit_status: ExitStatus,
@@ -285,7 +337,7 @@ fn append_all(dst: &mut Vec<u8>, src: &[u8]) {
dst.extend_from_slice(src);
}
#[derive(Debug)]
#[derive(Clone, Debug)]
pub struct ExecToolCallOutput {
pub exit_code: i32,
pub stdout: StreamOutput<String>,
@@ -302,7 +354,11 @@ async fn exec(
) -> Result<RawExecToolCallOutput> {
let timeout = params.timeout_duration();
let ExecParams {
command, cwd, env, ..
command,
cwd,
env,
arg0,
..
} = params;
let (program, args) = command.split_first().ok_or_else(|| {
@@ -311,11 +367,11 @@ async fn exec(
"command args are empty",
))
})?;
let arg0 = None;
let arg0_ref = arg0.as_deref();
let child = spawn_child_async(
PathBuf::from(program),
args.into(),
arg0,
arg0_ref,
cwd,
sandbox_policy,
StdioPolicy::RedirectForShellTool,

View File

@@ -1,57 +0,0 @@
use serde::Deserialize;
use serde::Serialize;
use crate::exec_command::session_id::SessionId;
#[derive(Debug, Clone, Deserialize)]
pub struct ExecCommandParams {
pub(crate) cmd: String,
#[serde(default = "default_yield_time")]
pub(crate) yield_time_ms: u64,
#[serde(default = "max_output_tokens")]
pub(crate) max_output_tokens: u64,
#[serde(default = "default_shell")]
pub(crate) shell: String,
#[serde(default = "default_login")]
pub(crate) login: bool,
}
fn default_yield_time() -> u64 {
10_000
}
fn max_output_tokens() -> u64 {
10_000
}
fn default_login() -> bool {
true
}
fn default_shell() -> String {
"/bin/bash".to_string()
}
#[derive(Debug, Deserialize, Serialize)]
pub struct WriteStdinParams {
pub(crate) session_id: SessionId,
pub(crate) chars: String,
#[serde(default = "write_stdin_default_yield_time_ms")]
pub(crate) yield_time_ms: u64,
#[serde(default = "write_stdin_default_max_output_tokens")]
pub(crate) max_output_tokens: u64,
}
fn write_stdin_default_yield_time_ms() -> u64 {
250
}
fn write_stdin_default_max_output_tokens() -> u64 {
10_000
}

View File

@@ -1,96 +0,0 @@
use std::sync::Mutex as StdMutex;
use tokio::sync::broadcast;
use tokio::sync::mpsc;
use tokio::task::JoinHandle;
#[derive(Debug)]
pub(crate) struct ExecCommandSession {
/// Queue for writing bytes to the process stdin (PTY master write side).
writer_tx: mpsc::Sender<Vec<u8>>,
/// Broadcast stream of output chunks read from the PTY. New subscribers
/// receive only chunks emitted after they subscribe.
output_tx: broadcast::Sender<Vec<u8>>,
/// Child killer handle for termination on drop (can signal independently
/// of a thread blocked in `.wait()`).
killer: StdMutex<Option<Box<dyn portable_pty::ChildKiller + Send + Sync>>>,
/// JoinHandle for the blocking PTY reader task.
reader_handle: StdMutex<Option<JoinHandle<()>>>,
/// JoinHandle for the stdin writer task.
writer_handle: StdMutex<Option<JoinHandle<()>>>,
/// JoinHandle for the child wait task.
wait_handle: StdMutex<Option<JoinHandle<()>>>,
/// Tracks whether the underlying process has exited.
exit_status: std::sync::Arc<std::sync::atomic::AtomicBool>,
}
impl ExecCommandSession {
pub(crate) fn new(
writer_tx: mpsc::Sender<Vec<u8>>,
output_tx: broadcast::Sender<Vec<u8>>,
killer: Box<dyn portable_pty::ChildKiller + Send + Sync>,
reader_handle: JoinHandle<()>,
writer_handle: JoinHandle<()>,
wait_handle: JoinHandle<()>,
exit_status: std::sync::Arc<std::sync::atomic::AtomicBool>,
) -> (Self, broadcast::Receiver<Vec<u8>>) {
let initial_output_rx = output_tx.subscribe();
(
Self {
writer_tx,
output_tx,
killer: StdMutex::new(Some(killer)),
reader_handle: StdMutex::new(Some(reader_handle)),
writer_handle: StdMutex::new(Some(writer_handle)),
wait_handle: StdMutex::new(Some(wait_handle)),
exit_status,
},
initial_output_rx,
)
}
pub(crate) fn writer_sender(&self) -> mpsc::Sender<Vec<u8>> {
self.writer_tx.clone()
}
pub(crate) fn output_receiver(&self) -> broadcast::Receiver<Vec<u8>> {
self.output_tx.subscribe()
}
pub(crate) fn has_exited(&self) -> bool {
self.exit_status.load(std::sync::atomic::Ordering::SeqCst)
}
}
impl Drop for ExecCommandSession {
fn drop(&mut self) {
// Best-effort: terminate child first so blocking tasks can complete.
if let Ok(mut killer_opt) = self.killer.lock()
&& let Some(mut killer) = killer_opt.take()
{
let _ = killer.kill();
}
// Abort background tasks; they may already have exited after kill.
if let Ok(mut h) = self.reader_handle.lock()
&& let Some(handle) = h.take()
{
handle.abort();
}
if let Ok(mut h) = self.writer_handle.lock()
&& let Some(handle) = h.take()
{
handle.abort();
}
if let Ok(mut h) = self.wait_handle.lock()
&& let Some(handle) = h.take()
{
handle.abort();
}
}
}

View File

@@ -1,14 +0,0 @@
mod exec_command_params;
mod exec_command_session;
mod responses_api;
mod session_id;
mod session_manager;
pub use exec_command_params::ExecCommandParams;
pub use exec_command_params::WriteStdinParams;
pub(crate) use exec_command_session::ExecCommandSession;
pub use responses_api::EXEC_COMMAND_TOOL_NAME;
pub use responses_api::WRITE_STDIN_TOOL_NAME;
pub use responses_api::create_exec_command_tool_for_responses_api;
pub use responses_api::create_write_stdin_tool_for_responses_api;
pub use session_manager::SessionManager as ExecSessionManager;

View File

@@ -1,98 +0,0 @@
use std::collections::BTreeMap;
use crate::client_common::tools::ResponsesApiTool;
use crate::openai_tools::JsonSchema;
pub const EXEC_COMMAND_TOOL_NAME: &str = "exec_command";
pub const WRITE_STDIN_TOOL_NAME: &str = "write_stdin";
pub fn create_exec_command_tool_for_responses_api() -> ResponsesApiTool {
let mut properties = BTreeMap::<String, JsonSchema>::new();
properties.insert(
"cmd".to_string(),
JsonSchema::String {
description: Some("The shell command to execute.".to_string()),
},
);
properties.insert(
"yield_time_ms".to_string(),
JsonSchema::Number {
description: Some("The maximum time in milliseconds to wait for output.".to_string()),
},
);
properties.insert(
"max_output_tokens".to_string(),
JsonSchema::Number {
description: Some("The maximum number of tokens to output.".to_string()),
},
);
properties.insert(
"shell".to_string(),
JsonSchema::String {
description: Some("The shell to use. Defaults to \"/bin/bash\".".to_string()),
},
);
properties.insert(
"login".to_string(),
JsonSchema::Boolean {
description: Some(
"Whether to run the command as a login shell. Defaults to true.".to_string(),
),
},
);
ResponsesApiTool {
name: EXEC_COMMAND_TOOL_NAME.to_owned(),
description: r#"Execute shell commands on the local machine with streaming output."#
.to_string(),
strict: false,
parameters: JsonSchema::Object {
properties,
required: Some(vec!["cmd".to_string()]),
additional_properties: Some(false.into()),
},
}
}
pub fn create_write_stdin_tool_for_responses_api() -> ResponsesApiTool {
let mut properties = BTreeMap::<String, JsonSchema>::new();
properties.insert(
"session_id".to_string(),
JsonSchema::Number {
description: Some("The ID of the exec_command session.".to_string()),
},
);
properties.insert(
"chars".to_string(),
JsonSchema::String {
description: Some("The characters to write to stdin.".to_string()),
},
);
properties.insert(
"yield_time_ms".to_string(),
JsonSchema::Number {
description: Some(
"The maximum time in milliseconds to wait for output after writing.".to_string(),
),
},
);
properties.insert(
"max_output_tokens".to_string(),
JsonSchema::Number {
description: Some("The maximum number of tokens to output.".to_string()),
},
);
ResponsesApiTool {
name: WRITE_STDIN_TOOL_NAME.to_owned(),
description: r#"Write characters to an exec session's stdin. Returns all stdout+stderr received within yield_time_ms.
Can write control characters (\u0003 for Ctrl-C), or an empty string to just poll stdout+stderr."#
.to_string(),
strict: false,
parameters: JsonSchema::Object {
properties,
required: Some(vec!["session_id".to_string(), "chars".to_string()]),
additional_properties: Some(false.into()),
},
}
}

View File

@@ -1,5 +0,0 @@
use serde::Deserialize;
use serde::Serialize;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub(crate) struct SessionId(pub u32);

View File

@@ -1,506 +0,0 @@
use std::collections::HashMap;
use std::io::ErrorKind;
use std::io::Read;
use std::sync::Arc;
use std::sync::Mutex as StdMutex;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicU32;
use portable_pty::CommandBuilder;
use portable_pty::PtySize;
use portable_pty::native_pty_system;
use tokio::sync::Mutex;
use tokio::sync::mpsc;
use tokio::sync::oneshot;
use tokio::time::Duration;
use tokio::time::Instant;
use tokio::time::timeout;
use crate::exec_command::exec_command_params::ExecCommandParams;
use crate::exec_command::exec_command_params::WriteStdinParams;
use crate::exec_command::exec_command_session::ExecCommandSession;
use crate::exec_command::session_id::SessionId;
use crate::truncate::truncate_middle;
#[derive(Debug, Default)]
pub struct SessionManager {
next_session_id: AtomicU32,
sessions: Mutex<HashMap<SessionId, ExecCommandSession>>,
}
#[derive(Debug)]
pub struct ExecCommandOutput {
wall_time: Duration,
exit_status: ExitStatus,
original_token_count: Option<u64>,
output: String,
}
impl ExecCommandOutput {
pub(crate) fn to_text_output(&self) -> String {
let wall_time_secs = self.wall_time.as_secs_f32();
let termination_status = match self.exit_status {
ExitStatus::Exited(code) => format!("Process exited with code {code}"),
ExitStatus::Ongoing(session_id) => {
format!("Process running with session ID {}", session_id.0)
}
};
let truncation_status = match self.original_token_count {
Some(tokens) => {
format!("\nWarning: truncated output (original token count: {tokens})")
}
None => "".to_string(),
};
format!(
r#"Wall time: {wall_time_secs:.3} seconds
{termination_status}{truncation_status}
Output:
{output}"#,
output = self.output
)
}
}
#[derive(Debug)]
pub enum ExitStatus {
Exited(i32),
Ongoing(SessionId),
}
impl SessionManager {
/// Processes the request and is required to send a response via `outgoing`.
pub async fn handle_exec_command_request(
&self,
params: ExecCommandParams,
) -> Result<ExecCommandOutput, String> {
// Allocate a session id.
let session_id = SessionId(
self.next_session_id
.fetch_add(1, std::sync::atomic::Ordering::SeqCst),
);
let (session, mut output_rx, mut exit_rx) = create_exec_command_session(params.clone())
.await
.map_err(|err| {
format!(
"failed to create exec command session for session id {}: {err}",
session_id.0
)
})?;
// Insert into session map.
self.sessions.lock().await.insert(session_id, session);
// Collect output until either timeout expires or process exits.
// Do not cap during collection; truncate at the end if needed.
// Use a modest initial capacity to avoid large preallocation.
let cap_bytes_u64 = params.max_output_tokens.saturating_mul(4);
let cap_bytes: usize = cap_bytes_u64.min(usize::MAX as u64) as usize;
let mut collected: Vec<u8> = Vec::with_capacity(4096);
let start_time = Instant::now();
let deadline = start_time + Duration::from_millis(params.yield_time_ms);
let mut exit_code: Option<i32> = None;
loop {
if Instant::now() >= deadline {
break;
}
let remaining = deadline.saturating_duration_since(Instant::now());
tokio::select! {
biased;
exit = &mut exit_rx => {
exit_code = exit.ok();
// Small grace period to pull remaining buffered output
let grace_deadline = Instant::now() + Duration::from_millis(25);
while Instant::now() < grace_deadline {
match timeout(Duration::from_millis(1), output_rx.recv()).await {
Ok(Ok(chunk)) => {
collected.extend_from_slice(&chunk);
}
Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => {
// Skip missed messages; keep trying within grace period.
continue;
}
Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => break,
Err(_) => break,
}
}
break;
}
chunk = timeout(remaining, output_rx.recv()) => {
match chunk {
Ok(Ok(chunk)) => {
collected.extend_from_slice(&chunk);
}
Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => {
// Skip missed messages; continue collecting fresh output.
}
Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => { break; }
Err(_) => { break; }
}
}
}
}
let output = String::from_utf8_lossy(&collected).to_string();
let exit_status = if let Some(code) = exit_code {
ExitStatus::Exited(code)
} else {
ExitStatus::Ongoing(session_id)
};
// If output exceeds cap, truncate the middle and record original token estimate.
let (output, original_token_count) = truncate_middle(&output, cap_bytes);
Ok(ExecCommandOutput {
wall_time: Instant::now().duration_since(start_time),
exit_status,
original_token_count,
output,
})
}
/// Write characters to a session's stdin and collect combined output for up to `yield_time_ms`.
pub async fn handle_write_stdin_request(
&self,
params: WriteStdinParams,
) -> Result<ExecCommandOutput, String> {
let WriteStdinParams {
session_id,
chars,
yield_time_ms,
max_output_tokens,
} = params;
// Grab handles without holding the sessions lock across await points.
let (writer_tx, mut output_rx) = {
let sessions = self.sessions.lock().await;
match sessions.get(&session_id) {
Some(session) => (session.writer_sender(), session.output_receiver()),
None => {
return Err(format!("unknown session id {}", session_id.0));
}
}
};
// Write stdin if provided.
if !chars.is_empty() && writer_tx.send(chars.into_bytes()).await.is_err() {
return Err("failed to write to stdin".to_string());
}
// Collect output up to yield_time_ms, truncating to max_output_tokens bytes.
let mut collected: Vec<u8> = Vec::with_capacity(4096);
let start_time = Instant::now();
let deadline = start_time + Duration::from_millis(yield_time_ms);
loop {
let now = Instant::now();
if now >= deadline {
break;
}
let remaining = deadline - now;
match timeout(remaining, output_rx.recv()).await {
Ok(Ok(chunk)) => {
// Collect all output within the time budget; truncate at the end.
collected.extend_from_slice(&chunk);
}
Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => {
// Skip missed messages; continue collecting fresh output.
}
Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => break,
Err(_) => break, // timeout
}
}
// Return structured output, truncating middle if over cap.
let output = String::from_utf8_lossy(&collected).to_string();
let cap_bytes_u64 = max_output_tokens.saturating_mul(4);
let cap_bytes: usize = cap_bytes_u64.min(usize::MAX as u64) as usize;
let (output, original_token_count) = truncate_middle(&output, cap_bytes);
Ok(ExecCommandOutput {
wall_time: Instant::now().duration_since(start_time),
exit_status: ExitStatus::Ongoing(session_id),
original_token_count,
output,
})
}
}
/// Spawn PTY and child process per spawn_exec_command_session logic.
async fn create_exec_command_session(
params: ExecCommandParams,
) -> anyhow::Result<(
ExecCommandSession,
tokio::sync::broadcast::Receiver<Vec<u8>>,
oneshot::Receiver<i32>,
)> {
let ExecCommandParams {
cmd,
yield_time_ms: _,
max_output_tokens: _,
shell,
login,
} = params;
// Use the native pty implementation for the system
let pty_system = native_pty_system();
// Create a new pty
let pair = pty_system.openpty(PtySize {
rows: 24,
cols: 80,
pixel_width: 0,
pixel_height: 0,
})?;
// Spawn a shell into the pty
let mut command_builder = CommandBuilder::new(shell);
let shell_mode_opt = if login { "-lc" } else { "-c" };
command_builder.arg(shell_mode_opt);
command_builder.arg(cmd);
let mut child = pair.slave.spawn_command(command_builder)?;
// Obtain a killer that can signal the process independently of `.wait()`.
let killer = child.clone_killer();
// Channel to forward write requests to the PTY writer.
let (writer_tx, mut writer_rx) = mpsc::channel::<Vec<u8>>(128);
// Broadcast for streaming PTY output to readers: subscribers receive from subscription time.
let (output_tx, _) = tokio::sync::broadcast::channel::<Vec<u8>>(256);
// Reader task: drain PTY and forward chunks to output channel.
let mut reader = pair.master.try_clone_reader()?;
let output_tx_clone = output_tx.clone();
let reader_handle = tokio::task::spawn_blocking(move || {
let mut buf = [0u8; 8192];
loop {
match reader.read(&mut buf) {
Ok(0) => break, // EOF
Ok(n) => {
// Forward to broadcast; best-effort if there are subscribers.
let _ = output_tx_clone.send(buf[..n].to_vec());
}
Err(ref e) if e.kind() == ErrorKind::Interrupted => {
// Retry on EINTR
continue;
}
Err(ref e) if e.kind() == ErrorKind::WouldBlock => {
// We're in a blocking thread; back off briefly and retry.
std::thread::sleep(Duration::from_millis(5));
continue;
}
Err(_) => break,
}
}
});
// Writer task: apply stdin writes to the PTY writer.
let writer = pair.master.take_writer()?;
let writer = Arc::new(StdMutex::new(writer));
let writer_handle = tokio::spawn({
let writer = writer.clone();
async move {
while let Some(bytes) = writer_rx.recv().await {
let writer = writer.clone();
// Perform blocking write on a blocking thread.
let _ = tokio::task::spawn_blocking(move || {
if let Ok(mut guard) = writer.lock() {
use std::io::Write;
let _ = guard.write_all(&bytes);
let _ = guard.flush();
}
})
.await;
}
}
});
// Keep the child alive until it exits, then signal exit code.
let (exit_tx, exit_rx) = oneshot::channel::<i32>();
let exit_status = Arc::new(AtomicBool::new(false));
let wait_exit_status = exit_status.clone();
let wait_handle = tokio::task::spawn_blocking(move || {
let code = match child.wait() {
Ok(status) => status.exit_code() as i32,
Err(_) => -1,
};
wait_exit_status.store(true, std::sync::atomic::Ordering::SeqCst);
let _ = exit_tx.send(code);
});
// Create and store the session with channels.
let (session, initial_output_rx) = ExecCommandSession::new(
writer_tx,
output_tx,
killer,
reader_handle,
writer_handle,
wait_handle,
exit_status,
);
Ok((session, initial_output_rx, exit_rx))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::exec_command::session_id::SessionId;
/// Test that verifies that [`SessionManager::handle_exec_command_request()`]
/// and [`SessionManager::handle_write_stdin_request()`] work as expected
/// in the presence of a process that never terminates (but produces
/// output continuously).
#[cfg(unix)]
#[allow(clippy::print_stderr)]
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn session_manager_streams_and_truncates_from_now() {
use crate::exec_command::exec_command_params::ExecCommandParams;
use crate::exec_command::exec_command_params::WriteStdinParams;
use tokio::time::sleep;
let session_manager = SessionManager::default();
// Long-running loop that prints an increasing counter every ~100ms.
// Use Python for a portable, reliable sleep across shells/PTYs.
let cmd = r#"python3 - <<'PY'
import sys, time
count = 0
while True:
print(count)
sys.stdout.flush()
count += 100
time.sleep(0.1)
PY"#
.to_string();
// Start the session and collect ~3s of output.
let params = ExecCommandParams {
cmd,
yield_time_ms: 3_000,
max_output_tokens: 1_000, // large enough to avoid truncation here
shell: "/bin/bash".to_string(),
login: false,
};
let initial_output = match session_manager
.handle_exec_command_request(params.clone())
.await
{
Ok(v) => v,
Err(e) => {
// PTY may be restricted in some sandboxes; skip in that case.
if e.contains("openpty") || e.contains("Operation not permitted") {
eprintln!("skipping test due to restricted PTY: {e}");
return;
}
panic!("exec request failed unexpectedly: {e}");
}
};
eprintln!("initial output: {initial_output:?}");
// Should be ongoing (we launched a never-ending loop).
let session_id = match initial_output.exit_status {
ExitStatus::Ongoing(id) => id,
_ => panic!("expected ongoing session"),
};
// Parse the numeric lines and get the max observed value in the first window.
let first_nums = extract_monotonic_numbers(&initial_output.output);
assert!(
!first_nums.is_empty(),
"expected some output from first window"
);
let first_max = *first_nums.iter().max().unwrap();
// Wait ~4s so counters progress while we're not reading.
sleep(Duration::from_millis(4_000)).await;
// Now read ~3s of output "from now" only.
// Use a small token cap so truncation occurs and we test middle truncation.
let write_params = WriteStdinParams {
session_id,
chars: String::new(),
yield_time_ms: 3_000,
max_output_tokens: 16, // 16 tokens ~= 64 bytes -> likely truncation
};
let second = session_manager
.handle_write_stdin_request(write_params)
.await
.expect("write stdin should succeed");
// Verify truncation metadata and size bound (cap is tokens*4 bytes).
assert!(second.original_token_count.is_some());
let cap_bytes = (16u64 * 4) as usize;
assert!(second.output.len() <= cap_bytes);
// New middle marker should be present.
assert!(
second.output.contains("tokens truncated") && second.output.contains('…'),
"expected truncation marker in output, got: {}",
second.output
);
// Minimal freshness check: the earliest number we see in the second window
// should be significantly larger than the last from the first window.
let second_nums = extract_monotonic_numbers(&second.output);
assert!(
!second_nums.is_empty(),
"expected some numeric output from second window"
);
let second_min = *second_nums.iter().min().unwrap();
// We slept 4 seconds (~40 ticks at 100ms/tick, each +100), so expect
// an increase of roughly 4000 or more. Allow a generous margin.
assert!(
second_min >= first_max + 2000,
"second_min={second_min} first_max={first_max}",
);
}
#[cfg(unix)]
fn extract_monotonic_numbers(s: &str) -> Vec<i64> {
s.lines()
.filter_map(|line| {
if !line.is_empty()
&& line.chars().all(|c| c.is_ascii_digit())
&& let Ok(n) = line.parse::<i64>()
{
// Our generator increments by 100; ignore spurious fragments.
if n % 100 == 0 {
return Some(n);
}
}
None
})
.collect()
}
#[test]
fn to_text_output_exited_no_truncation() {
let out = ExecCommandOutput {
wall_time: Duration::from_millis(1234),
exit_status: ExitStatus::Exited(0),
original_token_count: None,
output: "hello".to_string(),
};
let text = out.to_text_output();
let expected = r#"Wall time: 1.234 seconds
Process exited with code 0
Output:
hello"#;
assert_eq!(expected, text);
}
#[test]
fn to_text_output_ongoing_with_truncation() {
let out = ExecCommandOutput {
wall_time: Duration::from_millis(500),
exit_status: ExitStatus::Ongoing(SessionId(42)),
original_token_count: Some(1000),
output: "abc".to_string(),
};
let text = out.to_text_output();
let expected = r#"Wall time: 0.500 seconds
Process running with session ID 42
Warning: truncated output (original token count: 1000)
Output:
abc"#;
assert_eq!(expected, text);
}
}

View File

@@ -1,109 +0,0 @@
use std::collections::HashMap;
use std::env;
use async_trait::async_trait;
use crate::CODEX_APPLY_PATCH_ARG1;
use crate::apply_patch::ApplyPatchExec;
use crate::exec::ExecParams;
use crate::executor::ExecutorConfig;
use crate::function_tool::FunctionCallError;
pub(crate) enum ExecutionMode {
Shell,
ApplyPatch(ApplyPatchExec),
}
#[async_trait]
/// Backend-specific hooks that prepare and post-process execution requests for a
/// given [`ExecutionMode`].
pub(crate) trait ExecutionBackend: Send + Sync {
fn prepare(
&self,
params: ExecParams,
// Required for downcasting the apply_patch.
mode: &ExecutionMode,
config: &ExecutorConfig,
) -> Result<ExecParams, FunctionCallError>;
fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
true
}
}
static SHELL_BACKEND: ShellBackend = ShellBackend;
static APPLY_PATCH_BACKEND: ApplyPatchBackend = ApplyPatchBackend;
pub(crate) fn backend_for_mode(mode: &ExecutionMode) -> &'static dyn ExecutionBackend {
match mode {
ExecutionMode::Shell => &SHELL_BACKEND,
ExecutionMode::ApplyPatch(_) => &APPLY_PATCH_BACKEND,
}
}
struct ShellBackend;
#[async_trait]
impl ExecutionBackend for ShellBackend {
fn prepare(
&self,
params: ExecParams,
mode: &ExecutionMode,
_config: &ExecutorConfig,
) -> Result<ExecParams, FunctionCallError> {
match mode {
ExecutionMode::Shell => Ok(params),
_ => Err(FunctionCallError::RespondToModel(
"shell backend invoked with non-shell mode".to_string(),
)),
}
}
}
struct ApplyPatchBackend;
#[async_trait]
impl ExecutionBackend for ApplyPatchBackend {
fn prepare(
&self,
params: ExecParams,
mode: &ExecutionMode,
config: &ExecutorConfig,
) -> Result<ExecParams, FunctionCallError> {
match mode {
ExecutionMode::ApplyPatch(exec) => {
let path_to_codex = if let Some(exe_path) = &config.codex_exe {
exe_path.to_string_lossy().to_string()
} else {
env::current_exe()
.ok()
.map(|p| p.to_string_lossy().to_string())
.ok_or_else(|| {
FunctionCallError::RespondToModel(
"failed to determine path to codex executable".to_string(),
)
})?
};
let patch = exec.action.patch.clone();
Ok(ExecParams {
command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
cwd: exec.action.cwd.clone(),
timeout_ms: params.timeout_ms,
// Run apply_patch with a minimal environment for determinism and to
// avoid leaking host environment variables into the patch process.
env: HashMap::new(),
with_escalated_permissions: params.with_escalated_permissions,
justification: params.justification,
})
}
ExecutionMode::Shell => Err(FunctionCallError::RespondToModel(
"apply_patch backend invoked without patch context".to_string(),
)),
}
}
fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
false
}
}

View File

@@ -1,51 +0,0 @@
use std::collections::HashSet;
use std::sync::Arc;
use std::sync::Mutex;
#[derive(Clone, Debug, Default)]
/// Thread-safe store of user approvals so repeated commands can reuse
/// previously granted trust.
pub(crate) struct ApprovalCache {
inner: Arc<Mutex<HashSet<Vec<String>>>>,
}
impl ApprovalCache {
pub(crate) fn insert(&self, command: Vec<String>) {
if command.is_empty() {
return;
}
if let Ok(mut guard) = self.inner.lock() {
guard.insert(command);
}
}
pub(crate) fn snapshot(&self) -> HashSet<Vec<String>> {
self.inner.lock().map(|g| g.clone()).unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn insert_ignores_empty_and_dedupes() {
let cache = ApprovalCache::default();
// Empty should be ignored
cache.insert(vec![]);
assert!(cache.snapshot().is_empty());
// Insert a command and verify snapshot contains it
let cmd = vec!["foo".to_string(), "bar".to_string()];
cache.insert(cmd.clone());
let snap1 = cache.snapshot();
assert!(snap1.contains(&cmd));
// Reinserting should not create duplicates
cache.insert(cmd);
let snap2 = cache.snapshot();
assert_eq!(snap1, snap2);
}
}

View File

@@ -1,68 +0,0 @@
mod backends;
mod cache;
mod runner;
mod sandbox;
pub(crate) use backends::ExecutionMode;
pub(crate) use runner::ExecutionRequest;
pub(crate) use runner::Executor;
pub(crate) use runner::ExecutorConfig;
pub(crate) use runner::normalize_exec_result;
pub(crate) mod linkers {
use crate::exec::ExecParams;
use crate::exec::StdoutStream;
use crate::executor::backends::ExecutionMode;
use crate::executor::runner::ExecutionRequest;
use crate::tools::context::ExecCommandContext;
pub struct PreparedExec {
pub(crate) context: ExecCommandContext,
pub(crate) request: ExecutionRequest,
}
impl PreparedExec {
pub fn new(
context: ExecCommandContext,
params: ExecParams,
approval_command: Vec<String>,
mode: ExecutionMode,
stdout_stream: Option<StdoutStream>,
use_shell_profile: bool,
) -> Self {
let request = ExecutionRequest {
params,
approval_command,
mode,
stdout_stream,
use_shell_profile,
};
Self { context, request }
}
}
}
pub mod errors {
use crate::error::CodexErr;
use crate::function_tool::FunctionCallError;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum ExecError {
#[error(transparent)]
Function(#[from] FunctionCallError),
#[error(transparent)]
Codex(#[from] CodexErr),
}
impl ExecError {
pub(crate) fn rejection(msg: impl Into<String>) -> Self {
FunctionCallError::RespondToModel(msg.into()).into()
}
pub(crate) fn denied(msg: impl Into<String>) -> Self {
FunctionCallError::Denied(msg.into()).into()
}
}
}

View File

@@ -1,433 +0,0 @@
use std::future::Future;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::RwLock;
use std::time::Duration;
use super::backends::ExecutionMode;
use super::backends::backend_for_mode;
use super::cache::ApprovalCache;
use crate::codex::Session;
use crate::error::CodexErr;
use crate::error::SandboxErr;
use crate::error::get_error_message_ui;
use crate::exec::ExecParams;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StdoutStream;
use crate::exec::StreamOutput;
use crate::exec::process_exec_tool_call;
use crate::executor::errors::ExecError;
use crate::executor::sandbox::select_sandbox;
use crate::function_tool::FunctionCallError;
use crate::protocol::AskForApproval;
use crate::protocol::ReviewDecision;
use crate::protocol::SandboxPolicy;
use crate::shell;
use crate::tools::context::ExecCommandContext;
use codex_otel::otel_event_manager::ToolDecisionSource;
#[derive(Clone, Debug)]
pub(crate) struct ExecutorConfig {
pub(crate) sandbox_policy: SandboxPolicy,
pub(crate) sandbox_cwd: PathBuf,
pub(crate) codex_exe: Option<PathBuf>,
}
impl ExecutorConfig {
pub(crate) fn new(
sandbox_policy: SandboxPolicy,
sandbox_cwd: PathBuf,
codex_exe: Option<PathBuf>,
) -> Self {
Self {
sandbox_policy,
sandbox_cwd,
codex_exe,
}
}
}
/// Coordinates sandbox selection, backend-specific preparation, and command
/// execution for tool calls requested by the model.
pub(crate) struct Executor {
approval_cache: ApprovalCache,
config: Arc<RwLock<ExecutorConfig>>,
}
impl Executor {
pub(crate) fn new(config: ExecutorConfig) -> Self {
Self {
approval_cache: ApprovalCache::default(),
config: Arc::new(RwLock::new(config)),
}
}
/// Updates the sandbox policy and working directory used for future
/// executions without recreating the executor.
pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) {
if let Ok(mut cfg) = self.config.write() {
cfg.sandbox_policy = sandbox_policy;
cfg.sandbox_cwd = sandbox_cwd;
}
}
/// Runs a prepared execution request end-to-end: prepares parameters, decides on
/// sandbox placement (prompting the user when necessary), launches the command,
/// and lets the backend post-process the final output.
pub(crate) async fn run<F, Fut>(
&self,
mut request: ExecutionRequest,
session: &Session,
approval_policy: AskForApproval,
context: &ExecCommandContext,
on_exec_begin: F,
) -> Result<ExecToolCallOutput, ExecError>
where
F: FnOnce() -> Fut,
Fut: Future<Output = ()>,
{
if matches!(request.mode, ExecutionMode::Shell) {
request.params =
maybe_translate_shell_command(request.params, session, request.use_shell_profile);
}
// Step 1: Snapshot sandbox configuration so it stays stable for this run.
let config = self
.config
.read()
.map_err(|_| ExecError::rejection("executor config poisoned"))?
.clone();
// Step 2: Normalise parameters via the selected backend.
let backend = backend_for_mode(&request.mode);
let stdout_stream = if backend.stream_stdout(&request.mode) {
request.stdout_stream.clone()
} else {
None
};
request.params = backend
.prepare(request.params, &request.mode, &config)
.map_err(ExecError::from)?;
// Step 3: Decide sandbox placement, prompting for approval when needed.
let sandbox_decision = select_sandbox(
&request,
approval_policy,
self.approval_cache.snapshot(),
&config,
session,
&context.sub_id,
&context.call_id,
&context.otel_event_manager,
)
.await?;
if sandbox_decision.record_session_approval {
self.approval_cache.insert(request.approval_command.clone());
}
on_exec_begin().await;
// Step 4: Launch the command within the chosen sandbox.
let first_attempt = self
.spawn(
request.params.clone(),
sandbox_decision.initial_sandbox,
&config,
stdout_stream.clone(),
)
.await;
// Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry.
match first_attempt {
Ok(output) => Ok(output),
Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => {
Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into())
}
Err(CodexErr::Sandbox(error)) => {
if sandbox_decision.escalate_on_failure {
self.retry_without_sandbox(
&request,
&config,
session,
context,
stdout_stream,
error,
)
.await
} else {
let message = sandbox_failure_message(error);
Err(ExecError::rejection(message))
}
}
Err(err) => Err(err.into()),
}
}
/// Fallback path invoked when a sandboxed run is denied so the user can
/// approve rerunning without isolation.
async fn retry_without_sandbox(
&self,
request: &ExecutionRequest,
config: &ExecutorConfig,
session: &Session,
context: &ExecCommandContext,
stdout_stream: Option<StdoutStream>,
sandbox_error: SandboxErr,
) -> Result<ExecToolCallOutput, ExecError> {
session
.notify_background_event(
&context.sub_id,
format!("Execution failed: {sandbox_error}"),
)
.await;
let decision = session
.request_command_approval(
context.sub_id.to_string(),
context.call_id.to_string(),
request.approval_command.clone(),
request.params.cwd.clone(),
Some("command failed; retry without sandbox?".to_string()),
)
.await;
context.otel_event_manager.tool_decision(
&context.tool_name,
&context.call_id,
decision,
ToolDecisionSource::User,
);
match decision {
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
if matches!(decision, ReviewDecision::ApprovedForSession) {
self.approval_cache.insert(request.approval_command.clone());
}
session
.notify_background_event(&context.sub_id, "retrying command without sandbox")
.await;
let retry_output = self
.spawn(
request.params.clone(),
SandboxType::None,
config,
stdout_stream,
)
.await?;
Ok(retry_output)
}
ReviewDecision::Denied | ReviewDecision::Abort => {
Err(ExecError::denied("exec command rejected by user"))
}
}
}
async fn spawn(
&self,
params: ExecParams,
sandbox: SandboxType,
config: &ExecutorConfig,
stdout_stream: Option<StdoutStream>,
) -> Result<ExecToolCallOutput, CodexErr> {
process_exec_tool_call(
params,
sandbox,
&config.sandbox_policy,
&config.sandbox_cwd,
&config.codex_exe,
stdout_stream,
)
.await
}
}
fn maybe_translate_shell_command(
params: ExecParams,
session: &Session,
use_shell_profile: bool,
) -> ExecParams {
let should_translate =
matches!(session.user_shell(), shell::Shell::PowerShell(_)) || use_shell_profile;
if should_translate
&& let Some(command) = session
.user_shell()
.format_default_shell_invocation(params.command.clone())
{
return ExecParams { command, ..params };
}
params
}
fn sandbox_failure_message(error: SandboxErr) -> String {
let codex_error = CodexErr::Sandbox(error);
let friendly = get_error_message_ui(&codex_error);
format!("failed in sandbox: {friendly}")
}
pub(crate) struct ExecutionRequest {
pub params: ExecParams,
pub approval_command: Vec<String>,
pub mode: ExecutionMode,
pub stdout_stream: Option<StdoutStream>,
pub use_shell_profile: bool,
}
pub(crate) struct NormalizedExecOutput<'a> {
borrowed: Option<&'a ExecToolCallOutput>,
synthetic: Option<ExecToolCallOutput>,
}
impl<'a> NormalizedExecOutput<'a> {
pub(crate) fn event_output(&'a self) -> &'a ExecToolCallOutput {
match (self.borrowed, self.synthetic.as_ref()) {
(Some(output), _) => output,
(None, Some(output)) => output,
(None, None) => unreachable!("normalized exec output missing data"),
}
}
}
/// Converts a raw execution result into a uniform view that always exposes an
/// [`ExecToolCallOutput`], synthesizing error output when the command fails
/// before producing a response.
pub(crate) fn normalize_exec_result(
result: &Result<ExecToolCallOutput, ExecError>,
) -> NormalizedExecOutput<'_> {
match result {
Ok(output) => NormalizedExecOutput {
borrowed: Some(output),
synthetic: None,
},
Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => {
NormalizedExecOutput {
borrowed: Some(output.as_ref()),
synthetic: None,
}
}
Err(err) => {
let message = match err {
ExecError::Function(FunctionCallError::RespondToModel(msg))
| ExecError::Function(FunctionCallError::Denied(msg)) => msg.clone(),
ExecError::Codex(e) => get_error_message_ui(e),
err => err.to_string(),
};
let synthetic = ExecToolCallOutput {
exit_code: -1,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(message.clone()),
aggregated_output: StreamOutput::new(message),
duration: Duration::default(),
timed_out: false,
};
NormalizedExecOutput {
borrowed: None,
synthetic: Some(synthetic),
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::CodexErr;
use crate::error::EnvVarError;
use crate::error::SandboxErr;
use crate::exec::StreamOutput;
use pretty_assertions::assert_eq;
fn make_output(text: &str) -> ExecToolCallOutput {
ExecToolCallOutput {
exit_code: 1,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(text.to_string()),
duration: Duration::from_millis(123),
timed_out: false,
}
}
#[test]
fn normalize_success_borrows() {
let out = make_output("ok");
let result: Result<ExecToolCallOutput, ExecError> = Ok(out);
let normalized = normalize_exec_result(&result);
assert_eq!(normalized.event_output().aggregated_output.text, "ok");
}
#[test]
fn normalize_timeout_borrows_embedded_output() {
let out = make_output("timed out payload");
let err = CodexErr::Sandbox(SandboxErr::Timeout {
output: Box::new(out),
});
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(err));
let normalized = normalize_exec_result(&result);
assert_eq!(
normalized.event_output().aggregated_output.text,
"timed out payload"
);
}
#[test]
fn sandbox_failure_message_uses_denied_stderr() {
let output = ExecToolCallOutput {
exit_code: 101,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new("sandbox stderr".to_string()),
aggregated_output: StreamOutput::new(String::new()),
duration: Duration::from_millis(10),
timed_out: false,
};
let err = SandboxErr::Denied {
output: Box::new(output),
};
let message = sandbox_failure_message(err);
assert_eq!(message, "failed in sandbox: sandbox stderr");
}
#[test]
fn sandbox_failure_message_falls_back_to_aggregated_output() {
let output = ExecToolCallOutput {
exit_code: 101,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new("aggregate text".to_string()),
duration: Duration::from_millis(10),
timed_out: false,
};
let err = SandboxErr::Denied {
output: Box::new(output),
};
let message = sandbox_failure_message(err);
assert_eq!(message, "failed in sandbox: aggregate text");
}
#[test]
fn normalize_function_error_synthesizes_payload() {
let err = FunctionCallError::RespondToModel("boom".to_string());
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Function(err));
let normalized = normalize_exec_result(&result);
assert_eq!(normalized.event_output().aggregated_output.text, "boom");
}
#[test]
fn normalize_codex_error_synthesizes_user_message() {
// Use a simple EnvVar error which formats to a clear message
let e = CodexErr::EnvVar(EnvVarError {
var: "FOO".to_string(),
instructions: Some("set it".to_string()),
});
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(e));
let normalized = normalize_exec_result(&result);
assert!(
normalized
.event_output()
.aggregated_output
.text
.contains("Missing environment variable: `FOO`"),
"expected synthesized user-friendly message"
);
}
}

View File

@@ -1,405 +0,0 @@
use crate::apply_patch::ApplyPatchExec;
use crate::codex::Session;
use crate::exec::SandboxType;
use crate::executor::ExecutionMode;
use crate::executor::ExecutionRequest;
use crate::executor::ExecutorConfig;
use crate::executor::errors::ExecError;
use crate::safety::SafetyCheck;
use crate::safety::assess_command_safety;
use crate::safety::assess_patch_safety;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_otel::otel_event_manager::ToolDecisionSource;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use std::collections::HashSet;
/// Sandbox placement options selected for an execution run, including whether
/// to escalate after failures and whether approvals should persist.
pub(crate) struct SandboxDecision {
pub(crate) initial_sandbox: SandboxType,
pub(crate) escalate_on_failure: bool,
pub(crate) record_session_approval: bool,
}
impl SandboxDecision {
fn auto(sandbox: SandboxType, escalate_on_failure: bool) -> Self {
Self {
initial_sandbox: sandbox,
escalate_on_failure,
record_session_approval: false,
}
}
fn user_override(record_session_approval: bool) -> Self {
Self {
initial_sandbox: SandboxType::None,
escalate_on_failure: false,
record_session_approval,
}
}
}
fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
matches!(
(approval, sandbox),
(
AskForApproval::UnlessTrusted | AskForApproval::OnFailure,
SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp
)
)
}
/// Determines how a command should be sandboxed, prompting the user when
/// policy requires explicit approval.
#[allow(clippy::too_many_arguments)]
pub async fn select_sandbox(
request: &ExecutionRequest,
approval_policy: AskForApproval,
approval_cache: HashSet<Vec<String>>,
config: &ExecutorConfig,
session: &Session,
sub_id: &str,
call_id: &str,
otel_event_manager: &OtelEventManager,
) -> Result<SandboxDecision, ExecError> {
match &request.mode {
ExecutionMode::Shell => {
select_shell_sandbox(
request,
approval_policy,
approval_cache,
config,
session,
sub_id,
call_id,
otel_event_manager,
)
.await
}
ExecutionMode::ApplyPatch(exec) => {
select_apply_patch_sandbox(exec, approval_policy, config)
}
}
}
#[allow(clippy::too_many_arguments)]
async fn select_shell_sandbox(
request: &ExecutionRequest,
approval_policy: AskForApproval,
approved_snapshot: HashSet<Vec<String>>,
config: &ExecutorConfig,
session: &Session,
sub_id: &str,
call_id: &str,
otel_event_manager: &OtelEventManager,
) -> Result<SandboxDecision, ExecError> {
let command_for_safety = if request.approval_command.is_empty() {
request.params.command.clone()
} else {
request.approval_command.clone()
};
let safety = assess_command_safety(
&command_for_safety,
approval_policy,
&config.sandbox_policy,
&approved_snapshot,
request.params.with_escalated_permissions.unwrap_or(false),
);
match safety {
SafetyCheck::AutoApprove {
sandbox_type,
user_explicitly_approved,
} => {
let mut decision = SandboxDecision::auto(
sandbox_type,
should_escalate_on_failure(approval_policy, sandbox_type),
);
if user_explicitly_approved {
decision.record_session_approval = true;
}
let (decision_for_event, source) = if user_explicitly_approved {
(ReviewDecision::ApprovedForSession, ToolDecisionSource::User)
} else {
(ReviewDecision::Approved, ToolDecisionSource::Config)
};
otel_event_manager.tool_decision("local_shell", call_id, decision_for_event, source);
Ok(decision)
}
SafetyCheck::AskUser => {
let decision = session
.request_command_approval(
sub_id.to_string(),
call_id.to_string(),
request.approval_command.clone(),
request.params.cwd.clone(),
request.params.justification.clone(),
)
.await;
otel_event_manager.tool_decision(
"local_shell",
call_id,
decision,
ToolDecisionSource::User,
);
match decision {
ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)),
ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)),
ReviewDecision::Denied | ReviewDecision::Abort => {
Err(ExecError::denied("exec command rejected by user"))
}
}
}
SafetyCheck::Reject { reason } => Err(ExecError::rejection(format!(
"exec command rejected: {reason}"
))),
}
}
fn select_apply_patch_sandbox(
exec: &ApplyPatchExec,
approval_policy: AskForApproval,
config: &ExecutorConfig,
) -> Result<SandboxDecision, ExecError> {
if exec.user_explicitly_approved_this_action {
return Ok(SandboxDecision::user_override(false));
}
match assess_patch_safety(
&exec.action,
approval_policy,
&config.sandbox_policy,
&config.sandbox_cwd,
) {
SafetyCheck::AutoApprove { sandbox_type, .. } => Ok(SandboxDecision::auto(
sandbox_type,
should_escalate_on_failure(approval_policy, sandbox_type),
)),
SafetyCheck::AskUser => Err(ExecError::rejection(
"patch requires approval but none was recorded",
)),
SafetyCheck::Reject { reason } => {
Err(ExecError::rejection(format!("patch rejected: {reason}")))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::codex::make_session_and_context;
use crate::exec::ExecParams;
use crate::function_tool::FunctionCallError;
use crate::protocol::SandboxPolicy;
use codex_apply_patch::ApplyPatchAction;
use pretty_assertions::assert_eq;
#[tokio::test]
async fn select_apply_patch_user_override_when_explicit() {
let (session, ctx) = make_session_and_context();
let tmp = tempfile::tempdir().expect("tmp");
let p = tmp.path().join("a.txt");
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
let exec = ApplyPatchExec {
action,
user_explicitly_approved_this_action: true,
};
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
let request = ExecutionRequest {
params: ExecParams {
command: vec!["apply_patch".into()],
cwd: std::env::temp_dir(),
timeout_ms: None,
env: std::collections::HashMap::new(),
with_escalated_permissions: None,
justification: None,
},
approval_command: vec!["apply_patch".into()],
mode: ExecutionMode::ApplyPatch(exec),
stdout_stream: None,
use_shell_profile: false,
};
let otel_event_manager = ctx.client.get_otel_event_manager();
let decision = select_sandbox(
&request,
AskForApproval::OnRequest,
Default::default(),
&cfg,
&session,
"sub",
"call",
&otel_event_manager,
)
.await
.expect("ok");
// Explicit user override runs without sandbox
assert_eq!(decision.initial_sandbox, SandboxType::None);
assert_eq!(decision.escalate_on_failure, false);
}
#[tokio::test]
async fn select_apply_patch_autoapprove_in_danger() {
let (session, ctx) = make_session_and_context();
let tmp = tempfile::tempdir().expect("tmp");
let p = tmp.path().join("a.txt");
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
let exec = ApplyPatchExec {
action,
user_explicitly_approved_this_action: false,
};
let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
let request = ExecutionRequest {
params: ExecParams {
command: vec!["apply_patch".into()],
cwd: std::env::temp_dir(),
timeout_ms: None,
env: std::collections::HashMap::new(),
with_escalated_permissions: None,
justification: None,
},
approval_command: vec!["apply_patch".into()],
mode: ExecutionMode::ApplyPatch(exec),
stdout_stream: None,
use_shell_profile: false,
};
let otel_event_manager = ctx.client.get_otel_event_manager();
let decision = select_sandbox(
&request,
AskForApproval::OnRequest,
Default::default(),
&cfg,
&session,
"sub",
"call",
&otel_event_manager,
)
.await
.expect("ok");
// On platforms with a sandbox, DangerFullAccess still prefers it
let expected = crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None);
assert_eq!(decision.initial_sandbox, expected);
assert_eq!(decision.escalate_on_failure, false);
}
#[tokio::test]
async fn select_apply_patch_requires_approval_on_unless_trusted() {
let (session, ctx) = make_session_and_context();
let tempdir = tempfile::tempdir().expect("tmpdir");
let p = tempdir.path().join("a.txt");
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
let exec = ApplyPatchExec {
action,
user_explicitly_approved_this_action: false,
};
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
let request = ExecutionRequest {
params: ExecParams {
command: vec!["apply_patch".into()],
cwd: std::env::temp_dir(),
timeout_ms: None,
env: std::collections::HashMap::new(),
with_escalated_permissions: None,
justification: None,
},
approval_command: vec!["apply_patch".into()],
mode: ExecutionMode::ApplyPatch(exec),
stdout_stream: None,
use_shell_profile: false,
};
let otel_event_manager = ctx.client.get_otel_event_manager();
let result = select_sandbox(
&request,
AskForApproval::UnlessTrusted,
Default::default(),
&cfg,
&session,
"sub",
"call",
&otel_event_manager,
)
.await;
match result {
Ok(_) => panic!("expected error"),
Err(ExecError::Function(FunctionCallError::RespondToModel(msg))) => {
assert!(msg.contains("requires approval"))
}
Err(other) => panic!("unexpected error: {other:?}"),
}
}
#[tokio::test]
async fn select_shell_autoapprove_in_danger_mode() {
let (session, ctx) = make_session_and_context();
let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
let request = ExecutionRequest {
params: ExecParams {
command: vec!["some-unknown".into()],
cwd: std::env::temp_dir(),
timeout_ms: None,
env: std::collections::HashMap::new(),
with_escalated_permissions: None,
justification: None,
},
approval_command: vec!["some-unknown".into()],
mode: ExecutionMode::Shell,
stdout_stream: None,
use_shell_profile: false,
};
let otel_event_manager = ctx.client.get_otel_event_manager();
let decision = select_sandbox(
&request,
AskForApproval::OnRequest,
Default::default(),
&cfg,
&session,
"sub",
"call",
&otel_event_manager,
)
.await
.expect("ok");
assert_eq!(decision.initial_sandbox, SandboxType::None);
assert_eq!(decision.escalate_on_failure, false);
}
#[cfg(any(target_os = "macos", target_os = "linux"))]
#[tokio::test]
async fn select_shell_escalates_on_failure_with_platform_sandbox() {
let (session, ctx) = make_session_and_context();
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
let request = ExecutionRequest {
params: ExecParams {
// Unknown command => untrusted but not flagged dangerous
command: vec!["some-unknown".into()],
cwd: std::env::temp_dir(),
timeout_ms: None,
env: std::collections::HashMap::new(),
with_escalated_permissions: None,
justification: None,
},
approval_command: vec!["some-unknown".into()],
mode: ExecutionMode::Shell,
stdout_stream: None,
use_shell_profile: false,
};
let otel_event_manager = ctx.client.get_otel_event_manager();
let decision = select_sandbox(
&request,
AskForApproval::OnFailure,
Default::default(),
&cfg,
&session,
"sub",
"call",
&otel_event_manager,
)
.await
.expect("ok");
// On macOS/Linux we should have a platform sandbox and escalate on failure
assert_ne!(decision.initial_sandbox, SandboxType::None);
assert_eq!(decision.escalate_on_failure, true);
}
}

View File

@@ -5,6 +5,7 @@ pub enum FunctionCallError {
#[error("{0}")]
RespondToModel(String),
#[error("{0}")]
#[allow(dead_code)] // TODO(jif) fix in a follow-up PR
Denied(String),
#[error("LocalShellCall without call_id or id")]
MissingLocalShellCallId,

View File

@@ -40,7 +40,7 @@ where
}
/// Converts the sandbox policy into the CLI invocation for `codex-linux-sandbox`.
fn create_linux_sandbox_command_args(
pub(crate) fn create_linux_sandbox_command_args(
command: Vec<String>,
sandbox_policy: &SandboxPolicy,
sandbox_policy_cwd: &Path,
@@ -56,7 +56,9 @@ fn create_linux_sandbox_command_args(
serde_json::to_string(sandbox_policy).expect("Failed to serialize SandboxPolicy to JSON");
let mut linux_cmd: Vec<String> = vec![
"--sandbox-policy-cwd".to_string(),
sandbox_policy_cwd,
"--sandbox-policy".to_string(),
sandbox_policy_json,
// Separator so that command arguments starting with `-` are not parsed as
// options of the helper itself.

View File

@@ -25,9 +25,7 @@ pub mod custom_prompts;
mod environment_context;
pub mod error;
pub mod exec;
mod exec_command;
pub mod exec_env;
pub mod executor;
pub mod features;
mod flags;
pub mod git_info;
@@ -38,6 +36,7 @@ mod mcp_tool_call;
mod message_history;
mod model_provider_info;
pub mod parse_command;
pub mod sandboxing;
pub mod token_data;
mod truncate;
mod unified_exec;
@@ -59,7 +58,6 @@ pub use auth::CodexAuth;
pub mod default_client;
pub mod model_family;
mod openai_model_info;
mod openai_tools;
pub mod project_doc;
mod rollout;
pub(crate) mod safety;

View File

@@ -1 +0,0 @@
pub use crate::tools::spec::*;

View File

@@ -1,4 +1,3 @@
use std::collections::HashSet;
use std::path::Component;
use std::path::Path;
use std::path::PathBuf;
@@ -8,8 +7,6 @@ use codex_apply_patch::ApplyPatchFileChange;
use crate::exec::SandboxType;
use crate::command_safety::is_dangerous_command::command_might_be_dangerous;
use crate::command_safety::is_safe_command::is_known_safe_command;
use crate::protocol::AskForApproval;
use crate::protocol::SandboxPolicy;
@@ -48,30 +45,29 @@ pub fn assess_patch_safety(
}
}
// Even though the patch *appears* to be constrained to writable paths, it
// is possible that paths in the patch are hard links to files outside the
// writable roots, so we should still run `apply_patch` in a sandbox in that
// case.
// Even though the patch appears to be constrained to writable paths, it is
// possible that paths in the patch are hard links to files outside the
// writable roots, so we should still run `apply_patch` in a sandbox in that case.
if is_write_patch_constrained_to_writable_paths(action, sandbox_policy, cwd)
|| policy == AskForApproval::OnFailure
{
// Only autoapprove when we can actually enforce a sandbox. Otherwise
// fall back to asking the user because the patch may touch arbitrary
// paths outside the project.
match get_platform_sandbox() {
Some(sandbox_type) => SafetyCheck::AutoApprove {
sandbox_type,
if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
// DangerFullAccess is intended to bypass sandboxing entirely.
SafetyCheck::AutoApprove {
sandbox_type: SandboxType::None,
user_explicitly_approved: false,
},
None if sandbox_policy == &SandboxPolicy::DangerFullAccess => {
// If the user has explicitly requested DangerFullAccess, then
// we can auto-approve even without a sandbox.
SafetyCheck::AutoApprove {
sandbox_type: SandboxType::None,
user_explicitly_approved: false,
}
}
None => SafetyCheck::AskUser,
} else {
// Only autoapprove when we can actually enforce a sandbox. Otherwise
// fall back to asking the user because the patch may touch arbitrary
// paths outside the project.
match get_platform_sandbox() {
Some(sandbox_type) => SafetyCheck::AutoApprove {
sandbox_type,
user_explicitly_approved: false,
},
None => SafetyCheck::AskUser,
}
}
} else if policy == AskForApproval::Never {
SafetyCheck::Reject {
@@ -83,124 +79,6 @@ pub fn assess_patch_safety(
}
}
/// For a command to be run _without_ a sandbox, one of the following must be
/// true:
///
/// - the user has explicitly approved the command
/// - the command is on the "known safe" list
/// - `DangerFullAccess` was specified and `UnlessTrusted` was not
pub fn assess_command_safety(
command: &[String],
approval_policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
approved: &HashSet<Vec<String>>,
with_escalated_permissions: bool,
) -> SafetyCheck {
// Some commands look dangerous. Even if they are run inside a sandbox,
// unless the user has explicitly approved them, we should ask,
// or reject if the approval_policy tells us not to ask.
if command_might_be_dangerous(command) && !approved.contains(command) {
if approval_policy == AskForApproval::Never {
return SafetyCheck::Reject {
reason: "dangerous command detected; rejected by user approval settings"
.to_string(),
};
}
return SafetyCheck::AskUser;
}
// A command is "trusted" because either:
// - it belongs to a set of commands we consider "safe" by default, or
// - the user has explicitly approved the command for this session
//
// Currently, whether a command is "trusted" is a simple boolean, but we
// should include more metadata on this command test to indicate whether it
// should be run inside a sandbox or not. (This could be something the user
// defines as part of `execpolicy`.)
//
// For example, when `is_known_safe_command(command)` returns `true`, it
// would probably be fine to run the command in a sandbox, but when
// `approved.contains(command)` is `true`, the user may have approved it for
// the session _because_ they know it needs to run outside a sandbox.
if is_known_safe_command(command) || approved.contains(command) {
let user_explicitly_approved = approved.contains(command);
return SafetyCheck::AutoApprove {
sandbox_type: SandboxType::None,
user_explicitly_approved,
};
}
assess_safety_for_untrusted_command(approval_policy, sandbox_policy, with_escalated_permissions)
}
pub(crate) fn assess_safety_for_untrusted_command(
approval_policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
with_escalated_permissions: bool,
) -> SafetyCheck {
use AskForApproval::*;
use SandboxPolicy::*;
match (approval_policy, sandbox_policy) {
(UnlessTrusted, _) => {
// Even though the user may have opted into DangerFullAccess,
// they also requested that we ask for approval for untrusted
// commands.
SafetyCheck::AskUser
}
(OnFailure, DangerFullAccess)
| (Never, DangerFullAccess)
| (OnRequest, DangerFullAccess) => SafetyCheck::AutoApprove {
sandbox_type: SandboxType::None,
user_explicitly_approved: false,
},
(OnRequest, ReadOnly) | (OnRequest, WorkspaceWrite { .. }) => {
if with_escalated_permissions {
SafetyCheck::AskUser
} else {
match get_platform_sandbox() {
Some(sandbox_type) => SafetyCheck::AutoApprove {
sandbox_type,
user_explicitly_approved: false,
},
// Fall back to asking since the command is untrusted and
// we do not have a sandbox available
None => SafetyCheck::AskUser,
}
}
}
(Never, ReadOnly)
| (Never, WorkspaceWrite { .. })
| (OnFailure, ReadOnly)
| (OnFailure, WorkspaceWrite { .. }) => {
match get_platform_sandbox() {
Some(sandbox_type) => SafetyCheck::AutoApprove {
sandbox_type,
user_explicitly_approved: false,
},
None => {
if matches!(approval_policy, OnFailure) {
// Since the command is not trusted, even though the
// user has requested to only ask for approval on
// failure, we will ask the user because no sandbox is
// available.
SafetyCheck::AskUser
} else {
// We are in non-interactive mode and lack approval, so
// all we can do is reject the command.
SafetyCheck::Reject {
reason: "auto-rejected because command is not on trusted list"
.to_string(),
}
}
}
}
}
}
}
pub fn get_platform_sandbox() -> Option<SandboxType> {
if cfg!(target_os = "macos") {
Some(SandboxType::MacosSeatbelt)
@@ -339,101 +217,4 @@ mod tests {
&cwd,
));
}
#[test]
fn test_request_escalated_privileges() {
// Should not be a trusted command
let command = vec!["git commit".to_string()];
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = SandboxPolicy::ReadOnly;
let approved: HashSet<Vec<String>> = HashSet::new();
let request_escalated_privileges = true;
let safety_check = assess_command_safety(
&command,
approval_policy,
&sandbox_policy,
&approved,
request_escalated_privileges,
);
assert_eq!(safety_check, SafetyCheck::AskUser);
}
#[test]
fn dangerous_command_allowed_if_explicitly_approved() {
let command = vec!["git".to_string(), "reset".to_string(), "--hard".to_string()];
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = SandboxPolicy::ReadOnly;
let mut approved: HashSet<Vec<String>> = HashSet::new();
approved.insert(command.clone());
let request_escalated_privileges = false;
let safety_check = assess_command_safety(
&command,
approval_policy,
&sandbox_policy,
&approved,
request_escalated_privileges,
);
assert_eq!(
safety_check,
SafetyCheck::AutoApprove {
sandbox_type: SandboxType::None,
user_explicitly_approved: true,
}
);
}
#[test]
fn dangerous_command_not_allowed_if_not_explicitly_approved() {
let command = vec!["git".to_string(), "reset".to_string(), "--hard".to_string()];
let approval_policy = AskForApproval::Never;
let sandbox_policy = SandboxPolicy::ReadOnly;
let approved: HashSet<Vec<String>> = HashSet::new();
let request_escalated_privileges = false;
let safety_check = assess_command_safety(
&command,
approval_policy,
&sandbox_policy,
&approved,
request_escalated_privileges,
);
assert_eq!(
safety_check,
SafetyCheck::Reject {
reason: "dangerous command detected; rejected by user approval settings"
.to_string(),
}
);
}
#[test]
fn test_request_escalated_privileges_no_sandbox_fallback() {
let command = vec!["git".to_string(), "commit".to_string()];
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = SandboxPolicy::ReadOnly;
let approved: HashSet<Vec<String>> = HashSet::new();
let request_escalated_privileges = false;
let safety_check = assess_command_safety(
&command,
approval_policy,
&sandbox_policy,
&approved,
request_escalated_privileges,
);
let expected = match get_platform_sandbox() {
Some(sandbox_type) => SafetyCheck::AutoApprove {
sandbox_type,
user_explicitly_approved: false,
},
None => SafetyCheck::AskUser,
};
assert_eq!(safety_check, expected);
}
}

View File

@@ -0,0 +1,170 @@
/*
Module: sandboxing
Build platform wrappers and produce ExecEnv for execution. Owns lowlevel
sandbox placement and transformation of portable CommandSpec into a
readytospawn environment.
*/
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StdoutStream;
use crate::exec::execute_exec_env;
use crate::landlock::create_linux_sandbox_command_args;
use crate::protocol::SandboxPolicy;
use crate::seatbelt::MACOS_PATH_TO_SEATBELT_EXECUTABLE;
use crate::seatbelt::create_seatbelt_command_args;
use crate::spawn::CODEX_SANDBOX_ENV_VAR;
use crate::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use crate::tools::sandboxing::SandboxablePreference;
use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
#[derive(Clone, Debug)]
pub struct CommandSpec {
pub program: String,
pub args: Vec<String>,
pub cwd: PathBuf,
pub env: HashMap<String, String>,
pub timeout_ms: Option<u64>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
}
#[derive(Clone, Debug)]
pub struct ExecEnv {
pub command: Vec<String>,
pub cwd: PathBuf,
pub env: HashMap<String, String>,
pub timeout_ms: Option<u64>,
pub sandbox: SandboxType,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub arg0: Option<String>,
}
pub enum SandboxPreference {
Auto,
Require,
Forbid,
}
#[derive(Debug, thiserror::Error)]
pub(crate) enum SandboxTransformError {
#[error("missing codex-linux-sandbox executable path")]
MissingLinuxSandboxExecutable,
}
#[derive(Default)]
pub struct SandboxManager;
impl SandboxManager {
pub fn new() -> Self {
Self
}
pub(crate) fn select_initial(
&self,
policy: &SandboxPolicy,
pref: SandboxablePreference,
) -> SandboxType {
match pref {
SandboxablePreference::Forbid => SandboxType::None,
SandboxablePreference::Require => {
#[cfg(target_os = "macos")]
{
return SandboxType::MacosSeatbelt;
}
#[cfg(target_os = "linux")]
{
return SandboxType::LinuxSeccomp;
}
#[allow(unreachable_code)]
SandboxType::None
}
SandboxablePreference::Auto => match policy {
SandboxPolicy::DangerFullAccess => SandboxType::None,
#[cfg(target_os = "macos")]
_ => SandboxType::MacosSeatbelt,
#[cfg(target_os = "linux")]
_ => SandboxType::LinuxSeccomp,
#[cfg(not(any(target_os = "macos", target_os = "linux")))]
_ => SandboxType::None,
},
}
}
pub(crate) fn transform(
&self,
spec: &CommandSpec,
policy: &SandboxPolicy,
sandbox: SandboxType,
sandbox_policy_cwd: &Path,
codex_linux_sandbox_exe: Option<&PathBuf>,
) -> Result<ExecEnv, SandboxTransformError> {
let mut env = spec.env.clone();
if !policy.has_full_network_access() {
env.insert(
CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR.to_string(),
"1".to_string(),
);
}
let mut command = Vec::with_capacity(1 + spec.args.len());
command.push(spec.program.clone());
command.extend(spec.args.iter().cloned());
let (command, sandbox_env, arg0_override) = match sandbox {
SandboxType::None => (command, HashMap::new(), None),
SandboxType::MacosSeatbelt => {
let mut seatbelt_env = HashMap::new();
seatbelt_env.insert(CODEX_SANDBOX_ENV_VAR.to_string(), "seatbelt".to_string());
let mut args =
create_seatbelt_command_args(command.clone(), policy, sandbox_policy_cwd);
let mut full_command = Vec::with_capacity(1 + args.len());
full_command.push(MACOS_PATH_TO_SEATBELT_EXECUTABLE.to_string());
full_command.append(&mut args);
(full_command, seatbelt_env, None)
}
SandboxType::LinuxSeccomp => {
let exe = codex_linux_sandbox_exe
.ok_or(SandboxTransformError::MissingLinuxSandboxExecutable)?;
let mut args =
create_linux_sandbox_command_args(command.clone(), policy, sandbox_policy_cwd);
let mut full_command = Vec::with_capacity(1 + args.len());
full_command.push(exe.to_string_lossy().to_string());
full_command.append(&mut args);
(
full_command,
HashMap::new(),
Some("codex-linux-sandbox".to_string()),
)
}
};
env.extend(sandbox_env);
Ok(ExecEnv {
command,
cwd: spec.cwd.clone(),
env,
timeout_ms: spec.timeout_ms,
sandbox,
with_escalated_permissions: spec.with_escalated_permissions,
justification: spec.justification.clone(),
arg0: arg0_override,
})
}
pub fn denied(&self, sandbox: SandboxType, out: &ExecToolCallOutput) -> bool {
crate::exec::is_likely_sandbox_denied(sandbox, out)
}
}
pub async fn execute_env(
env: &ExecEnv,
policy: &SandboxPolicy,
stdout_stream: Option<StdoutStream>,
) -> crate::error::Result<ExecToolCallOutput> {
execute_exec_env(env.clone(), policy, stdout_stream).await
}

View File

@@ -14,7 +14,7 @@ const MACOS_SEATBELT_BASE_POLICY: &str = include_str!("seatbelt_base_policy.sbpl
/// to defend against an attacker trying to inject a malicious version on the
/// PATH. If /usr/bin/sandbox-exec has been tampered with, then the attacker
/// already has root access.
const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec";
pub(crate) const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec";
pub async fn spawn_command_under_seatbelt(
command: Vec<String>,
@@ -39,7 +39,7 @@ pub async fn spawn_command_under_seatbelt(
.await
}
fn create_seatbelt_command_args(
pub(crate) fn create_seatbelt_command_args(
command: Vec<String>,
sandbox_policy: &SandboxPolicy,
sandbox_policy_cwd: &Path,

View File

@@ -347,6 +347,7 @@ mod tests {
)]),
with_escalated_permissions: None,
justification: None,
arg0: None,
},
SandboxType::None,
&SandboxPolicy::DangerFullAccess,
@@ -455,6 +456,7 @@ mod macos_tests {
)]),
with_escalated_permissions: None,
justification: None,
arg0: None,
},
SandboxType::None,
&SandboxPolicy::DangerFullAccess,

View File

@@ -2,9 +2,8 @@ use std::sync::Arc;
use crate::AuthManager;
use crate::RolloutRecorder;
use crate::exec_command::ExecSessionManager;
use crate::executor::Executor;
use crate::mcp_connection_manager::McpConnectionManager;
use crate::tools::sandboxing::ApprovalStore;
use crate::unified_exec::UnifiedExecSessionManager;
use crate::user_notification::UserNotifier;
use codex_otel::otel_event_manager::OtelEventManager;
@@ -12,13 +11,12 @@ use tokio::sync::Mutex;
pub(crate) struct SessionServices {
pub(crate) mcp_connection_manager: McpConnectionManager,
pub(crate) session_manager: ExecSessionManager,
pub(crate) unified_exec_manager: UnifiedExecSessionManager,
pub(crate) notifier: UserNotifier,
pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
pub(crate) user_shell: crate::shell::Shell,
pub(crate) show_raw_agent_reasoning: bool,
pub(crate) executor: Executor,
pub(crate) auth_manager: Arc<AuthManager>,
pub(crate) otel_event_manager: OtelEventManager,
pub(crate) tool_approvals: Mutex<ApprovalStore>,
}

View File

@@ -232,6 +232,7 @@ mod tests {
}
#[derive(Clone, Debug)]
#[allow(dead_code)]
pub(crate) struct ExecCommandContext {
pub(crate) sub_id: String,
pub(crate) call_id: String,
@@ -243,6 +244,7 @@ pub(crate) struct ExecCommandContext {
}
#[derive(Clone, Debug)]
#[allow(dead_code)]
pub(crate) struct ApplyPatchCommandContext {
pub(crate) user_explicitly_approved_this_action: bool,
pub(crate) changes: HashMap<PathBuf, FileChange>,

View File

@@ -0,0 +1,235 @@
use crate::codex::Session;
use crate::exec::ExecToolCallOutput;
use crate::parse_command::parse_command;
use crate::protocol::Event;
use crate::protocol::EventMsg;
use crate::protocol::ExecCommandBeginEvent;
use crate::protocol::ExecCommandEndEvent;
use crate::protocol::FileChange;
use crate::protocol::PatchApplyBeginEvent;
use crate::protocol::PatchApplyEndEvent;
use crate::protocol::TurnDiffEvent;
use crate::tools::context::SharedTurnDiffTracker;
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Duration;
use super::format_exec_output;
use super::format_exec_output_str;
#[derive(Clone, Copy)]
pub(crate) struct ToolEventCtx<'a> {
pub session: &'a Session,
pub sub_id: &'a str,
pub call_id: &'a str,
pub turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
}
impl<'a> ToolEventCtx<'a> {
pub fn new(
session: &'a Session,
sub_id: &'a str,
call_id: &'a str,
turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
) -> Self {
Self {
session,
sub_id,
call_id,
turn_diff_tracker,
}
}
}
pub(crate) enum ToolEventStage {
Begin,
Success(ExecToolCallOutput),
Failure(ToolEventFailure),
}
pub(crate) enum ToolEventFailure {
Output(ExecToolCallOutput),
Message(String),
}
// Concrete, allocation-free emitter: avoid trait objects and boxed futures.
pub(crate) enum ToolEmitter {
Shell {
command: Vec<String>,
cwd: PathBuf,
},
ApplyPatch {
changes: HashMap<PathBuf, FileChange>,
auto_approved: bool,
},
}
impl ToolEmitter {
pub fn shell(command: Vec<String>, cwd: PathBuf) -> Self {
Self::Shell { command, cwd }
}
pub fn apply_patch(changes: HashMap<PathBuf, FileChange>, auto_approved: bool) -> Self {
Self::ApplyPatch {
changes,
auto_approved,
}
}
pub async fn emit(&self, ctx: ToolEventCtx<'_>, stage: ToolEventStage) {
match (self, stage) {
(Self::Shell { command, cwd }, ToolEventStage::Begin) => {
ctx.session
.send_event(Event {
id: ctx.sub_id.to_string(),
msg: EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id: ctx.call_id.to_string(),
command: command.clone(),
cwd: cwd.clone(),
parsed_cmd: parse_command(command),
}),
})
.await;
}
(Self::Shell { .. }, ToolEventStage::Success(output)) => {
emit_exec_end(
ctx,
output.stdout.text.clone(),
output.stderr.text.clone(),
output.aggregated_output.text.clone(),
output.exit_code,
output.duration,
format_exec_output_str(&output),
)
.await;
}
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Output(output))) => {
emit_exec_end(
ctx,
output.stdout.text.clone(),
output.stderr.text.clone(),
output.aggregated_output.text.clone(),
output.exit_code,
output.duration,
format_exec_output_str(&output),
)
.await;
}
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Message(message))) => {
emit_exec_end(
ctx,
String::new(),
(*message).to_string(),
(*message).to_string(),
-1,
Duration::ZERO,
format_exec_output(&message),
)
.await;
}
(
Self::ApplyPatch {
changes,
auto_approved,
},
ToolEventStage::Begin,
) => {
if let Some(tracker) = ctx.turn_diff_tracker {
let mut guard = tracker.lock().await;
guard.on_patch_begin(changes);
}
ctx.session
.send_event(Event {
id: ctx.sub_id.to_string(),
msg: EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id: ctx.call_id.to_string(),
auto_approved: *auto_approved,
changes: changes.clone(),
}),
})
.await;
}
(Self::ApplyPatch { .. }, ToolEventStage::Success(output)) => {
emit_patch_end(
ctx,
output.stdout.text.clone(),
output.stderr.text.clone(),
output.exit_code == 0,
)
.await;
}
(
Self::ApplyPatch { .. },
ToolEventStage::Failure(ToolEventFailure::Output(output)),
) => {
emit_patch_end(
ctx,
output.stdout.text.clone(),
output.stderr.text.clone(),
output.exit_code == 0,
)
.await;
}
(
Self::ApplyPatch { .. },
ToolEventStage::Failure(ToolEventFailure::Message(message)),
) => {
emit_patch_end(ctx, String::new(), (*message).to_string(), false).await;
}
}
}
}
async fn emit_exec_end(
ctx: ToolEventCtx<'_>,
stdout: String,
stderr: String,
aggregated_output: String,
exit_code: i32,
duration: Duration,
formatted_output: String,
) {
ctx.session
.send_event(Event {
id: ctx.sub_id.to_string(),
msg: EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: ctx.call_id.to_string(),
stdout,
stderr,
aggregated_output,
exit_code,
duration,
formatted_output,
}),
})
.await;
}
async fn emit_patch_end(ctx: ToolEventCtx<'_>, stdout: String, stderr: String, success: bool) {
ctx.session
.send_event(Event {
id: ctx.sub_id.to_string(),
msg: EventMsg::PatchApplyEnd(PatchApplyEndEvent {
call_id: ctx.call_id.to_string(),
stdout,
stderr,
success,
}),
})
.await;
if let Some(tracker) = ctx.turn_diff_tracker {
let unified_diff = {
let mut guard = tracker.lock().await;
guard.get_unified_diff()
};
if let Ok(Some(unified_diff)) = unified_diff {
ctx.session
.send_event(Event {
id: ctx.sub_id.to_string(),
msg: EventMsg::TurnDiff(TurnDiffEvent { unified_diff }),
})
.await;
}
}
}

View File

@@ -8,7 +8,6 @@ use crate::client_common::tools::ResponsesApiTool;
use crate::client_common::tools::ToolSpec;
use crate::exec::ExecParams;
use crate::function_tool::FunctionCallError;
use crate::openai_tools::JsonSchema;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
@@ -16,6 +15,7 @@ use crate::tools::handle_container_exec_with_params;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use crate::tools::spec::ApplyPatchToolArgs;
use crate::tools::spec::JsonSchema;
use async_trait::async_trait;
use serde::Deserialize;
use serde::Serialize;
@@ -72,6 +72,7 @@ impl ToolHandler for ApplyPatchHandler {
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
arg0: None,
};
let content = handle_container_exec_with_params(

View File

@@ -1,68 +0,0 @@
use async_trait::async_trait;
use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
use crate::exec_command::ExecCommandParams;
use crate::exec_command::WRITE_STDIN_TOOL_NAME;
use crate::exec_command::WriteStdinParams;
use crate::function_tool::FunctionCallError;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
pub struct ExecStreamHandler;
#[async_trait]
impl ToolHandler for ExecStreamHandler {
fn kind(&self) -> ToolKind {
ToolKind::Function
}
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
let ToolInvocation {
session,
tool_name,
payload,
..
} = invocation;
let arguments = match payload {
ToolPayload::Function { arguments } => arguments,
_ => {
return Err(FunctionCallError::RespondToModel(
"exec_stream handler received unsupported payload".to_string(),
));
}
};
let content = match tool_name.as_str() {
EXEC_COMMAND_TOOL_NAME => {
let params: ExecCommandParams = serde_json::from_str(&arguments).map_err(|e| {
FunctionCallError::RespondToModel(format!(
"failed to parse function arguments: {e:?}"
))
})?;
session.handle_exec_command_tool(params).await?
}
WRITE_STDIN_TOOL_NAME => {
let params: WriteStdinParams = serde_json::from_str(&arguments).map_err(|e| {
FunctionCallError::RespondToModel(format!(
"failed to parse function arguments: {e:?}"
))
})?;
session.handle_write_stdin_tool(params).await?
}
_ => {
return Err(FunctionCallError::RespondToModel(format!(
"exec_stream handler does not support tool {tool_name}"
)));
}
};
Ok(ToolOutput::Function {
content,
success: Some(true),
})
}
}

View File

@@ -1,5 +1,4 @@
pub mod apply_patch;
mod exec_stream;
mod grep_files;
mod list_dir;
mod mcp;
@@ -14,7 +13,6 @@ mod view_image;
pub use plan::PLAN_TOOL;
pub use apply_patch::ApplyPatchHandler;
pub use exec_stream::ExecStreamHandler;
pub use grep_files::GrepFilesHandler;
pub use list_dir::ListDirHandler;
pub use mcp::McpHandler;

View File

@@ -2,12 +2,12 @@ use crate::client_common::tools::ResponsesApiTool;
use crate::client_common::tools::ToolSpec;
use crate::codex::Session;
use crate::function_tool::FunctionCallError;
use crate::openai_tools::JsonSchema;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use crate::tools::spec::JsonSchema;
use async_trait::async_trait;
use codex_protocol::plan_tool::UpdatePlanArgs;
use codex_protocol::protocol::Event;

View File

@@ -24,6 +24,7 @@ impl ShellHandler {
env: create_env(&turn_context.shell_environment_policy),
with_escalated_permissions: params.with_escalated_permissions,
justification: params.justification,
arg0: None,
}
}
}

View File

@@ -35,7 +35,13 @@ impl ToolHandler for UnifiedExecHandler {
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
let ToolInvocation {
session, payload, ..
session,
turn,
sub_id,
call_id,
tool_name: _tool_name,
payload,
..
} = invocation;
let args = match payload {
@@ -73,13 +79,23 @@ impl ToolHandler for UnifiedExecHandler {
};
let request = UnifiedExecRequest {
session_id: parsed_session_id,
input_chunks: &input,
timeout_ms,
};
let value = session
.run_unified_exec_request(request)
.services
.unified_exec_manager
.handle_request(
request,
crate::unified_exec::UnifiedExecContext {
session: &session,
turn: turn.as_ref(),
sub_id: &sub_id,
call_id: &call_id,
session_id: parsed_session_id,
},
)
.await
.map_err(|err| {
FunctionCallError::RespondToModel(format!("unified exec failed: {err:?}"))

View File

@@ -1,12 +1,15 @@
pub mod context;
pub mod events;
pub(crate) mod handlers;
pub mod orchestrator;
pub mod parallel;
pub mod registry;
pub mod router;
pub mod runtimes;
pub mod sandboxing;
pub mod spec;
use crate::apply_patch;
use crate::apply_patch::ApplyPatchExec;
use crate::apply_patch::InternalApplyPatchInvocation;
use crate::apply_patch::convert_apply_patch_to_protocol;
use crate::codex::Session;
@@ -15,14 +18,19 @@ use crate::error::CodexErr;
use crate::error::SandboxErr;
use crate::exec::ExecParams;
use crate::exec::ExecToolCallOutput;
use crate::exec::StdoutStream;
use crate::executor::ExecutionMode;
use crate::executor::errors::ExecError;
use crate::executor::linkers::PreparedExec;
use crate::function_tool::FunctionCallError;
use crate::tools::context::ApplyPatchCommandContext;
use crate::tools::context::ExecCommandContext;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::events::ToolEmitter;
use crate::tools::events::ToolEventCtx;
use crate::tools::events::ToolEventFailure;
use crate::tools::events::ToolEventStage;
use crate::tools::orchestrator::ToolOrchestrator;
use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
use crate::tools::runtimes::shell::ShellRequest;
use crate::tools::runtimes::shell::ShellRuntime;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use codex_apply_patch::MaybeApplyPatchVerified;
use codex_apply_patch::maybe_parse_apply_patch_verified;
use codex_protocol::protocol::AskForApproval;
@@ -56,7 +64,7 @@ pub(crate) async fn handle_container_exec_with_params(
sub_id: String,
call_id: String,
) -> Result<String, FunctionCallError> {
let otel_event_manager = turn_context.client.get_otel_event_manager();
let _otel_event_manager = turn_context.client.get_otel_event_manager();
if params.with_escalated_permissions.unwrap_or(false)
&& !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
@@ -100,86 +108,142 @@ pub(crate) async fn handle_container_exec_with_params(
MaybeApplyPatchVerified::NotApplyPatch => None,
};
let command_for_display = if let Some(exec) = apply_patch_exec.as_ref() {
vec!["apply_patch".to_string(), exec.action.patch.clone()]
} else {
params.command.clone()
};
let exec_command_context = ExecCommandContext {
sub_id: sub_id.clone(),
call_id: call_id.clone(),
command_for_display: command_for_display.clone(),
cwd: params.cwd.clone(),
apply_patch: apply_patch_exec.as_ref().map(
|ApplyPatchExec {
action,
user_explicitly_approved_this_action,
}| ApplyPatchCommandContext {
user_explicitly_approved_this_action: *user_explicitly_approved_this_action,
changes: convert_apply_patch_to_protocol(action),
},
let (event_emitter, diff_opt) = match apply_patch_exec.as_ref() {
Some(exec) => (
ToolEmitter::apply_patch(
convert_apply_patch_to_protocol(&exec.action),
!exec.user_explicitly_approved_this_action,
),
Some(&turn_diff_tracker),
),
None => (
ToolEmitter::shell(params.command.clone(), params.cwd.clone()),
None,
),
tool_name: tool_name.to_string(),
otel_event_manager,
};
let mode = match apply_patch_exec {
Some(exec) => ExecutionMode::ApplyPatch(exec),
None => ExecutionMode::Shell,
};
let event_ctx = ToolEventCtx::new(sess.as_ref(), &sub_id, &call_id, diff_opt);
event_emitter.emit(event_ctx, ToolEventStage::Begin).await;
sess.services.executor.update_environment(
turn_context.sandbox_policy.clone(),
turn_context.cwd.clone(),
);
// Build runtime contexts only when needed (shell/apply_patch below).
let prepared_exec = PreparedExec::new(
exec_command_context,
params,
command_for_display,
mode,
Some(StdoutStream {
if let Some(exec) = apply_patch_exec {
// Route apply_patch execution through the new orchestrator/runtime.
let req = ApplyPatchRequest {
patch: exec.action.patch.clone(),
cwd: params.cwd.clone(),
timeout_ms: params.timeout_ms,
user_explicitly_approved: exec.user_explicitly_approved_this_action,
codex_exe: turn_context.codex_linux_sandbox_exe.clone(),
};
let mut orchestrator = ToolOrchestrator::new();
let mut runtime = ApplyPatchRuntime::new();
let tool_ctx = ToolCtx {
session: sess.as_ref(),
sub_id: sub_id.clone(),
call_id: call_id.clone(),
tx_event: sess.get_tx_event(),
}),
turn_context.shell_environment_policy.use_profile,
);
tool_name: tool_name.to_string(),
};
let output_result = sess
.run_exec_with_events(
turn_diff_tracker.clone(),
prepared_exec,
turn_context.approval_policy,
)
.await;
let out = orchestrator
.run(
&mut runtime,
&req,
&tool_ctx,
&turn_context,
turn_context.approval_policy,
)
.await;
// always make sure to truncate the output if its length isn't controlled.
match output_result {
handle_exec_outcome(&event_emitter, event_ctx, out).await
} else {
// Route shell execution through the new orchestrator/runtime.
let req = ShellRequest {
command: params.command.clone(),
cwd: params.cwd.clone(),
timeout_ms: params.timeout_ms,
env: params.env.clone(),
with_escalated_permissions: params.with_escalated_permissions,
justification: params.justification.clone(),
};
let mut orchestrator = ToolOrchestrator::new();
let mut runtime = ShellRuntime::new();
let tool_ctx = ToolCtx {
session: sess.as_ref(),
sub_id: sub_id.clone(),
call_id: call_id.clone(),
tool_name: tool_name.to_string(),
};
let out = orchestrator
.run(
&mut runtime,
&req,
&tool_ctx,
&turn_context,
turn_context.approval_policy,
)
.await;
handle_exec_outcome(&event_emitter, event_ctx, out).await
}
}
async fn handle_exec_outcome(
event_emitter: &ToolEmitter,
event_ctx: ToolEventCtx<'_>,
out: Result<ExecToolCallOutput, ToolError>,
) -> Result<String, FunctionCallError> {
let event;
let result = match out {
Ok(output) => {
let ExecToolCallOutput { exit_code, .. } = &output;
let content = format_exec_output_apply_patch(&output);
if *exit_code == 0 {
let content = format_exec_output_for_model(&output);
let exit_code = output.exit_code;
event = ToolEventStage::Success(output);
if exit_code == 0 {
Ok(content)
} else {
Err(FunctionCallError::RespondToModel(content))
}
}
Err(ExecError::Function(err)) => Err(truncate_function_error(err)),
Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err(
FunctionCallError::RespondToModel(format_exec_output_apply_patch(&output)),
),
Err(ExecError::Codex(err)) => {
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
let response = format_exec_output_for_model(&output);
event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
Err(FunctionCallError::RespondToModel(response))
}
Err(ToolError::Codex(err)) => {
let message = format!("execution error: {err:?}");
let response = format_exec_output(&message);
event = ToolEventStage::Failure(ToolEventFailure::Message(message));
Err(FunctionCallError::RespondToModel(format_exec_output(
&message,
&response,
)))
}
}
Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
// Normalize common rejection messages for exec tools so tests and
// users see a clear, consistent phrase.
let normalized = if msg == "rejected by user" {
"exec command rejected by user".to_string()
} else {
msg
};
let response = format_exec_output(&normalized);
event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
Err(FunctionCallError::RespondToModel(format_exec_output(
&response,
)))
}
};
event_emitter.emit(event_ctx, event).await;
result
}
pub fn format_exec_output_apply_patch(exec_output: &ExecToolCallOutput) -> String {
/// Format the combined exec output for sending back to the model.
/// Includes exit code and duration metadata; truncates large bodies safely.
pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
let ExecToolCallOutput {
exit_code,
duration,
@@ -233,18 +297,7 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
format_exec_output(content)
}
fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
match err {
FunctionCallError::RespondToModel(msg) => {
FunctionCallError::RespondToModel(format_exec_output(&msg))
}
FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
other => other,
}
}
fn format_exec_output(content: &str) -> String {
pub(super) fn format_exec_output(content: &str) -> String {
// Head+tail truncation for the model: show the beginning and end with an elision.
// Clients still receive full streams; only this formatted summary is capped.
let total_lines = content.lines().count();
@@ -315,6 +368,17 @@ mod tests {
use super::*;
use regex_lite::Regex;
fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
match err {
FunctionCallError::RespondToModel(msg) => {
FunctionCallError::RespondToModel(format_exec_output(&msg))
}
FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
other => other,
}
}
fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
let pattern = truncated_message_pattern(line, total_lines);
let regex = Regex::new(&pattern).unwrap_or_else(|err| {

View File

@@ -0,0 +1,172 @@
/*
Module: orchestrator
Central place for approvals + sandbox selection + retry semantics. Drives a
simple sequence for any ToolRuntime: approval → select sandbox → attempt →
retry without sandbox on denial (no reapproval thanks to caching).
*/
use crate::error::CodexErr;
use crate::error::SandboxErr;
use crate::exec::ExecToolCallOutput;
use crate::sandboxing::SandboxManager;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
pub(crate) struct ToolOrchestrator {
sandbox: SandboxManager,
}
impl ToolOrchestrator {
pub fn new() -> Self {
Self {
sandbox: SandboxManager::new(),
}
}
pub async fn run<Rq, Out, T>(
&mut self,
tool: &mut T,
req: &Rq,
tool_ctx: &ToolCtx<'_>,
turn_ctx: &crate::codex::TurnContext,
approval_policy: AskForApproval,
) -> Result<Out, ToolError>
where
T: ToolRuntime<Rq, Out>,
{
let otel = turn_ctx.client.get_otel_event_manager();
let otel_tn = &tool_ctx.tool_name;
let otel_ci = &tool_ctx.call_id;
let otel_user = codex_otel::otel_event_manager::ToolDecisionSource::User;
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
// 1) Approval
let needs_initial_approval =
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
let mut already_approved = false;
if needs_initial_approval {
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
sub_id: &tool_ctx.sub_id,
call_id: &tool_ctx.call_id,
retry_reason: None,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(ToolError::Rejected("rejected by user".to_string()));
}
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
}
already_approved = true;
} else {
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
}
// 2) First attempt under the selected sandbox.
let mut initial_sandbox = self
.sandbox
.select_initial(&turn_ctx.sandbox_policy, tool.sandbox_preference());
if tool.wants_escalated_first_attempt(req) {
initial_sandbox = crate::exec::SandboxType::None;
}
let initial_attempt = SandboxAttempt {
sandbox: initial_sandbox,
policy: &turn_ctx.sandbox_policy,
manager: &self.sandbox,
sandbox_cwd: &turn_ctx.cwd,
codex_linux_sandbox_exe: turn_ctx.codex_linux_sandbox_exe.as_ref(),
};
match tool.run(req, &initial_attempt, tool_ctx).await {
Ok(out) => {
// We have a successful initial result
Ok(out)
}
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
if !tool.escalate_on_failure() {
return Err(ToolError::SandboxDenied(
"sandbox denied and no retry".to_string(),
));
}
// Under `Never`, do not retry without sandbox; surface a concise message
// derived from the actual output (platform-agnostic).
if matches!(approval_policy, AskForApproval::Never) {
let msg = build_never_denied_message_from_output(output.as_ref());
return Err(ToolError::SandboxDenied(msg));
}
// Ask for approval before retrying without sandbox.
if !tool.should_bypass_approval(approval_policy, already_approved) {
let reason_msg = build_denial_reason_from_output(output.as_ref());
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
sub_id: &tool_ctx.sub_id,
call_id: &tool_ctx.call_id,
retry_reason: Some(reason_msg),
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, decision, otel_user);
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(ToolError::Rejected("rejected by user".to_string()));
}
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
}
}
let escalated_attempt = SandboxAttempt {
sandbox: crate::exec::SandboxType::None,
policy: &turn_ctx.sandbox_policy,
manager: &self.sandbox,
sandbox_cwd: &turn_ctx.cwd,
codex_linux_sandbox_exe: None,
};
// Second attempt.
(*tool).run(req, &escalated_attempt, tool_ctx).await
}
other => other,
}
}
}
fn build_never_denied_message_from_output(output: &ExecToolCallOutput) -> String {
let body = format!(
"{}\n{}\n{}",
output.stderr.text, output.stdout.text, output.aggregated_output.text
)
.to_lowercase();
let detail = if body.contains("permission denied") {
Some("Permission denied")
} else if body.contains("operation not permitted") {
Some("Operation not permitted")
} else if body.contains("read-only file system") {
Some("Read-only file system")
} else {
None
};
match detail {
Some(tag) => format!("failed in sandbox: {tag}"),
None => "failed in sandbox".to_string(),
}
}
fn build_denial_reason_from_output(_output: &ExecToolCallOutput) -> String {
// Keep approval reason terse and stable for UX/tests, but accept the
// output so we can evolve heuristics later without touching call sites.
"command failed; retry without sandbox?".to_string()
}

View File

@@ -0,0 +1,148 @@
//! Apply Patch runtime: executes verified patches under the orchestrator.
//!
//! Assumes `apply_patch` verification/approval happened upstream. Reuses that
//! decision to avoid re-prompting, builds the self-invocation command for
//! `codex --codex-run-as-apply-patch`, and runs under the current
//! `SandboxAttempt` with a minimal environment.
use crate::CODEX_APPLY_PATCH_ARG1;
use crate::exec::ExecToolCallOutput;
use crate::sandboxing::CommandSpec;
use crate::sandboxing::execute_env;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::SandboxablePreference;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
use crate::tools::sandboxing::with_cached_approval;
use codex_protocol::protocol::ReviewDecision;
use futures::future::BoxFuture;
use std::collections::HashMap;
use std::path::PathBuf;
#[derive(Clone, Debug)]
pub struct ApplyPatchRequest {
pub patch: String,
pub cwd: PathBuf,
pub timeout_ms: Option<u64>,
pub user_explicitly_approved: bool,
pub codex_exe: Option<PathBuf>,
}
#[derive(Default)]
pub struct ApplyPatchRuntime;
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
pub(crate) struct ApprovalKey {
patch: String,
cwd: PathBuf,
}
impl ApplyPatchRuntime {
pub fn new() -> Self {
Self
}
fn build_command_spec(req: &ApplyPatchRequest) -> Result<CommandSpec, ToolError> {
use std::env;
let exe = if let Some(path) = &req.codex_exe {
path.clone()
} else {
env::current_exe()
.map_err(|e| ToolError::Rejected(format!("failed to determine codex exe: {e}")))?
};
let program = exe.to_string_lossy().to_string();
Ok(CommandSpec {
program,
args: vec![CODEX_APPLY_PATCH_ARG1.to_string(), req.patch.clone()],
cwd: req.cwd.clone(),
timeout_ms: req.timeout_ms,
// Run apply_patch with a minimal environment for determinism and to avoid leaks.
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
})
}
fn stdout_stream(ctx: &ToolCtx<'_>) -> Option<crate::exec::StdoutStream> {
Some(crate::exec::StdoutStream {
sub_id: ctx.sub_id.clone(),
call_id: ctx.call_id.clone(),
tx_event: ctx.session.get_tx_event(),
})
}
}
impl Sandboxable for ApplyPatchRuntime {
fn sandbox_preference(&self) -> SandboxablePreference {
SandboxablePreference::Auto
}
fn escalate_on_failure(&self) -> bool {
true
}
}
impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
type ApprovalKey = ApprovalKey;
fn approval_key(&self, req: &ApplyPatchRequest) -> Self::ApprovalKey {
ApprovalKey {
patch: req.patch.clone(),
cwd: req.cwd.clone(),
}
}
fn start_approval_async<'a>(
&'a mut self,
req: &'a ApplyPatchRequest,
ctx: ApprovalCtx<'a>,
) -> BoxFuture<'a, ReviewDecision> {
let key = self.approval_key(req);
let session = ctx.session;
let sub_id = ctx.sub_id.to_string();
let call_id = ctx.call_id.to_string();
let cwd = req.cwd.clone();
let retry_reason = ctx.retry_reason.clone();
let user_explicitly_approved = req.user_explicitly_approved;
Box::pin(async move {
with_cached_approval(&session.services, key, || async move {
if let Some(reason) = retry_reason {
session
.request_command_approval(
sub_id,
call_id,
vec!["apply_patch".to_string()],
cwd,
Some(reason),
)
.await
} else if user_explicitly_approved {
ReviewDecision::ApprovedForSession
} else {
ReviewDecision::Approved
}
})
.await
})
}
}
impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
async fn run(
&mut self,
req: &ApplyPatchRequest,
attempt: &SandboxAttempt<'_>,
ctx: &ToolCtx<'_>,
) -> Result<ExecToolCallOutput, ToolError> {
let spec = Self::build_command_spec(req)?;
let env = attempt
.env_for(&spec)
.map_err(|err| ToolError::Codex(err.into()))?;
let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
.await
.map_err(ToolError::Codex)?;
Ok(out)
}
}

View File

@@ -0,0 +1,38 @@
/*
Module: runtimes
Concrete ToolRuntime implementations for specific tools. Each runtime stays
small and focused and reuses the orchestrator for approvals + sandbox + retry.
*/
use crate::sandboxing::CommandSpec;
use crate::tools::sandboxing::ToolError;
use std::collections::HashMap;
use std::path::Path;
pub mod apply_patch;
pub mod shell;
pub mod unified_exec;
/// Shared helper to construct a CommandSpec from a tokenized command line.
/// Validates that at least a program is present.
pub(crate) fn build_command_spec(
command: &[String],
cwd: &Path,
env: &HashMap<String, String>,
timeout_ms: Option<u64>,
with_escalated_permissions: Option<bool>,
justification: Option<String>,
) -> Result<CommandSpec, ToolError> {
let (program, args) = command
.split_first()
.ok_or_else(|| ToolError::Rejected("command args are empty".to_string()))?;
Ok(CommandSpec {
program: program.clone(),
args: args.to_vec(),
cwd: cwd.to_path_buf(),
env: env.clone(),
timeout_ms,
with_escalated_permissions,
justification,
})
}

View File

@@ -0,0 +1,164 @@
/*
Runtime: shell
Executes shell requests under the orchestrator: asks for approval when needed,
builds a CommandSpec, and runs it under the current SandboxAttempt.
*/
use crate::command_safety::is_dangerous_command::command_might_be_dangerous;
use crate::command_safety::is_safe_command::is_known_safe_command;
use crate::exec::ExecToolCallOutput;
use crate::protocol::SandboxPolicy;
use crate::sandboxing::execute_env;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::SandboxablePreference;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
use crate::tools::sandboxing::with_cached_approval;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use futures::future::BoxFuture;
use std::path::PathBuf;
#[derive(Clone, Debug)]
pub struct ShellRequest {
pub command: Vec<String>,
pub cwd: PathBuf,
pub timeout_ms: Option<u64>,
pub env: std::collections::HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
}
#[derive(Default)]
pub struct ShellRuntime;
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
pub(crate) struct ApprovalKey {
command: Vec<String>,
cwd: PathBuf,
escalated: bool,
}
impl ShellRuntime {
pub fn new() -> Self {
Self
}
fn stdout_stream(ctx: &ToolCtx<'_>) -> Option<crate::exec::StdoutStream> {
Some(crate::exec::StdoutStream {
sub_id: ctx.sub_id.clone(),
call_id: ctx.call_id.clone(),
tx_event: ctx.session.get_tx_event(),
})
}
}
impl Sandboxable for ShellRuntime {
fn sandbox_preference(&self) -> SandboxablePreference {
SandboxablePreference::Auto
}
fn escalate_on_failure(&self) -> bool {
true
}
}
impl Approvable<ShellRequest> for ShellRuntime {
type ApprovalKey = ApprovalKey;
fn approval_key(&self, req: &ShellRequest) -> Self::ApprovalKey {
ApprovalKey {
command: req.command.clone(),
cwd: req.cwd.clone(),
escalated: req.with_escalated_permissions.unwrap_or(false),
}
}
fn start_approval_async<'a>(
&'a mut self,
req: &'a ShellRequest,
ctx: ApprovalCtx<'a>,
) -> BoxFuture<'a, ReviewDecision> {
let key = self.approval_key(req);
let command = req.command.clone();
let cwd = req.cwd.clone();
let reason = ctx
.retry_reason
.clone()
.or_else(|| req.justification.clone());
let session = ctx.session;
let sub_id = ctx.sub_id.to_string();
let call_id = ctx.call_id.to_string();
Box::pin(async move {
with_cached_approval(&session.services, key, || async move {
session
.request_command_approval(sub_id, call_id, command, cwd, reason)
.await
})
.await
})
}
fn wants_initial_approval(
&self,
req: &ShellRequest,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
if is_known_safe_command(&req.command) {
return false;
}
match policy {
AskForApproval::Never | AskForApproval::OnFailure => false,
AskForApproval::OnRequest => {
// In DangerFullAccess, only prompt if the command looks dangerous.
if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
return command_might_be_dangerous(&req.command);
}
// In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for
// nonescalated, nondangerous commands — let the sandbox enforce
// restrictions (e.g., block network/write) without a user prompt.
let wants_escalation = req.with_escalated_permissions.unwrap_or(false);
if wants_escalation {
return true;
}
command_might_be_dangerous(&req.command)
}
AskForApproval::UnlessTrusted => !is_known_safe_command(&req.command),
}
}
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
req.with_escalated_permissions.unwrap_or(false)
}
}
impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
async fn run(
&mut self,
req: &ShellRequest,
attempt: &SandboxAttempt<'_>,
ctx: &ToolCtx<'_>,
) -> Result<ExecToolCallOutput, ToolError> {
let spec = build_command_spec(
&req.command,
&req.cwd,
&req.env,
req.timeout_ms,
req.with_escalated_permissions,
req.justification.clone(),
)?;
let env = attempt
.env_for(&spec)
.map_err(|err| ToolError::Codex(err.into()))?;
let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
.await
.map_err(ToolError::Codex)?;
Ok(out)
}
}

View File

@@ -0,0 +1,123 @@
/*
Runtime: unified exec
Handles approval + sandbox orchestration for unified exec requests, delegating to
the session manager to spawn PTYs once an ExecEnv is prepared.
*/
use crate::error::CodexErr;
use crate::error::SandboxErr;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::SandboxablePreference;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
use crate::tools::sandboxing::with_cached_approval;
use crate::unified_exec::UnifiedExecError;
use crate::unified_exec::UnifiedExecSession;
use crate::unified_exec::UnifiedExecSessionManager;
use codex_protocol::protocol::ReviewDecision;
use futures::future::BoxFuture;
use std::collections::HashMap;
use std::path::PathBuf;
#[derive(Clone, Debug)]
pub struct UnifiedExecRequest {
pub command: Vec<String>,
pub cwd: PathBuf,
pub env: HashMap<String, String>,
}
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
pub struct UnifiedExecApprovalKey {
pub command: Vec<String>,
pub cwd: PathBuf,
}
pub struct UnifiedExecRuntime<'a> {
manager: &'a UnifiedExecSessionManager,
}
impl UnifiedExecRequest {
pub fn new(command: Vec<String>, cwd: PathBuf, env: HashMap<String, String>) -> Self {
Self { command, cwd, env }
}
}
impl<'a> UnifiedExecRuntime<'a> {
pub fn new(manager: &'a UnifiedExecSessionManager) -> Self {
Self { manager }
}
}
impl Sandboxable for UnifiedExecRuntime<'_> {
fn sandbox_preference(&self) -> SandboxablePreference {
SandboxablePreference::Auto
}
fn escalate_on_failure(&self) -> bool {
true
}
}
impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
type ApprovalKey = UnifiedExecApprovalKey;
fn approval_key(&self, req: &UnifiedExecRequest) -> Self::ApprovalKey {
UnifiedExecApprovalKey {
command: req.command.clone(),
cwd: req.cwd.clone(),
}
}
fn start_approval_async<'b>(
&'b mut self,
req: &'b UnifiedExecRequest,
ctx: ApprovalCtx<'b>,
) -> BoxFuture<'b, ReviewDecision> {
let key = self.approval_key(req);
let session = ctx.session;
let sub_id = ctx.sub_id.to_string();
let call_id = ctx.call_id.to_string();
let command = req.command.clone();
let cwd = req.cwd.clone();
let reason = ctx.retry_reason.clone();
Box::pin(async move {
with_cached_approval(&session.services, key, || async move {
session
.request_command_approval(sub_id, call_id, command, cwd, reason)
.await
})
.await
})
}
}
impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRuntime<'a> {
async fn run(
&mut self,
req: &UnifiedExecRequest,
attempt: &SandboxAttempt<'_>,
_ctx: &ToolCtx<'_>,
) -> Result<UnifiedExecSession, ToolError> {
let spec = build_command_spec(&req.command, &req.cwd, &req.env, None, None, None)
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
let exec_env = attempt
.env_for(&spec)
.map_err(|err| ToolError::Codex(err.into()))?;
self.manager
.open_session_with_exec_env(&exec_env)
.await
.map_err(|err| match err {
UnifiedExecError::SandboxDenied { output, .. } => {
ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied {
output: Box::new(output),
}))
}
other => ToolError::Rejected(other.to_string()),
})
}
}

View File

@@ -0,0 +1,190 @@
//! Shared approvals and sandboxing traits used by tool runtimes.
//!
//! Consolidates the approval flow primitives (`ApprovalDecision`, `ApprovalStore`,
//! `ApprovalCtx`, `Approvable`) together with the sandbox orchestration traits
//! and helpers (`Sandboxable`, `ToolRuntime`, `SandboxAttempt`, etc.).
use crate::codex::Session;
use crate::error::CodexErr;
use crate::protocol::SandboxPolicy;
use crate::sandboxing::CommandSpec;
use crate::sandboxing::SandboxManager;
use crate::sandboxing::SandboxTransformError;
use crate::state::SessionServices;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use std::collections::HashMap;
use std::fmt::Debug;
use std::hash::Hash;
use std::path::Path;
use futures::Future;
use futures::future::BoxFuture;
use serde::Serialize;
#[derive(Clone, Default, Debug)]
pub(crate) struct ApprovalStore {
// Store serialized keys for generic caching across requests.
map: HashMap<String, ReviewDecision>,
}
impl ApprovalStore {
pub fn get<K>(&self, key: &K) -> Option<ReviewDecision>
where
K: Serialize,
{
let s = serde_json::to_string(key).ok()?;
self.map.get(&s).cloned()
}
pub fn put<K>(&mut self, key: K, value: ReviewDecision)
where
K: Serialize,
{
if let Ok(s) = serde_json::to_string(&key) {
self.map.insert(s, value);
}
}
}
pub(crate) async fn with_cached_approval<K, F, Fut>(
services: &SessionServices,
key: K,
fetch: F,
) -> ReviewDecision
where
K: Serialize + Clone,
F: FnOnce() -> Fut,
Fut: Future<Output = ReviewDecision>,
{
{
let store = services.tool_approvals.lock().await;
if let Some(decision) = store.get(&key) {
return decision;
}
}
let decision = fetch().await;
if matches!(decision, ReviewDecision::ApprovedForSession) {
let mut store = services.tool_approvals.lock().await;
store.put(key, ReviewDecision::ApprovedForSession);
}
decision
}
#[derive(Clone)]
pub(crate) struct ApprovalCtx<'a> {
pub session: &'a Session,
pub sub_id: &'a str,
pub call_id: &'a str,
pub retry_reason: Option<String>,
}
pub(crate) trait Approvable<Req> {
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
fn approval_key(&self, req: &Req) -> Self::ApprovalKey;
/// Some tools may request to skip the sandbox on the first attempt
/// (e.g., when the request explicitly asks for escalated permissions).
/// Defaults to `false`.
fn wants_escalated_first_attempt(&self, _req: &Req) -> bool {
false
}
fn should_bypass_approval(&self, policy: AskForApproval, already_approved: bool) -> bool {
if already_approved {
// We do not ask one more time
return true;
}
matches!(policy, AskForApproval::Never)
}
/// Decide whether an initial user approval should be requested before the
/// first attempt. Defaults to the orchestrator's behavior (prerefactor):
/// - Never, OnFailure: do not ask
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
/// - UnlessTrusted: always ask
fn wants_initial_approval(
&self,
_req: &Req,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
match policy {
AskForApproval::Never | AskForApproval::OnFailure => false,
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
AskForApproval::UnlessTrusted => true,
}
}
fn start_approval_async<'a>(
&'a mut self,
req: &'a Req,
ctx: ApprovalCtx<'a>,
) -> BoxFuture<'a, ReviewDecision>;
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum SandboxablePreference {
Auto,
#[allow(dead_code)] // Will be used by later tools.
Require,
#[allow(dead_code)] // Will be used by later tools.
Forbid,
}
pub(crate) trait Sandboxable {
fn sandbox_preference(&self) -> SandboxablePreference;
fn escalate_on_failure(&self) -> bool {
true
}
}
pub(crate) struct ToolCtx<'a> {
pub session: &'a Session,
pub sub_id: String,
pub call_id: String,
pub tool_name: String,
}
#[derive(Debug)]
pub(crate) enum ToolError {
Rejected(String),
SandboxDenied(String),
Codex(CodexErr),
}
pub(crate) trait ToolRuntime<Req, Out>: Approvable<Req> + Sandboxable {
async fn run(
&mut self,
req: &Req,
attempt: &SandboxAttempt<'_>,
ctx: &ToolCtx,
) -> Result<Out, ToolError>;
}
pub(crate) struct SandboxAttempt<'a> {
pub sandbox: crate::exec::SandboxType,
pub policy: &'a crate::protocol::SandboxPolicy,
pub(crate) manager: &'a SandboxManager,
pub(crate) sandbox_cwd: &'a Path,
pub codex_linux_sandbox_exe: Option<&'a std::path::PathBuf>,
}
impl<'a> SandboxAttempt<'a> {
pub fn env_for(
&self,
spec: &CommandSpec,
) -> Result<crate::sandboxing::ExecEnv, SandboxTransformError> {
self.manager.transform(
spec,
self.policy,
self.sandbox,
self.sandbox_cwd,
self.codex_linux_sandbox_exe,
)
}
}

View File

@@ -815,12 +815,7 @@ pub(crate) fn build_specs(
config: &ToolsConfig,
mcp_tools: Option<HashMap<String, mcp_types::Tool>>,
) -> ToolRegistryBuilder {
use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
use crate::exec_command::WRITE_STDIN_TOOL_NAME;
use crate::exec_command::create_exec_command_tool_for_responses_api;
use crate::exec_command::create_write_stdin_tool_for_responses_api;
use crate::tools::handlers::ApplyPatchHandler;
use crate::tools::handlers::ExecStreamHandler;
use crate::tools::handlers::GrepFilesHandler;
use crate::tools::handlers::ListDirHandler;
use crate::tools::handlers::McpHandler;
@@ -836,7 +831,6 @@ pub(crate) fn build_specs(
let mut builder = ToolRegistryBuilder::new();
let shell_handler = Arc::new(ShellHandler);
let exec_stream_handler = Arc::new(ExecStreamHandler);
let unified_exec_handler = Arc::new(UnifiedExecHandler);
let plan_handler = Arc::new(PlanHandler);
let apply_patch_handler = Arc::new(ApplyPatchHandler);
@@ -844,7 +838,10 @@ pub(crate) fn build_specs(
let mcp_handler = Arc::new(McpHandler);
let mcp_resource_handler = Arc::new(McpResourceHandler);
if config.experimental_unified_exec_tool {
let use_unified_exec = config.experimental_unified_exec_tool
|| matches!(config.shell_type, ConfigShellToolType::Streamable);
if use_unified_exec {
builder.push_spec(create_unified_exec_tool());
builder.register_handler("unified_exec", unified_exec_handler);
} else {
@@ -856,14 +853,7 @@ pub(crate) fn build_specs(
builder.push_spec(ToolSpec::LocalShell {});
}
ConfigShellToolType::Streamable => {
builder.push_spec(ToolSpec::Function(
create_exec_command_tool_for_responses_api(),
));
builder.push_spec(ToolSpec::Function(
create_write_stdin_tool_for_responses_api(),
));
builder.register_handler(EXEC_COMMAND_TOOL_NAME, exec_stream_handler.clone());
builder.register_handler(WRITE_STDIN_TOOL_NAME, exec_stream_handler);
// Already handled by use_unified_exec.
}
}
}

View File

@@ -1,22 +1,29 @@
use crate::exec::ExecToolCallOutput;
use thiserror::Error;
#[derive(Debug, Error)]
pub(crate) enum UnifiedExecError {
#[error("Failed to create unified exec session: {pty_error}")]
CreateSession {
#[source]
pty_error: anyhow::Error,
},
#[error("Failed to create unified exec session: {message}")]
CreateSession { message: String },
#[error("Unknown session id {session_id}")]
UnknownSessionId { session_id: i32 },
#[error("failed to write to stdin")]
WriteToStdin,
#[error("missing command line for unified exec request")]
MissingCommandLine,
#[error("Command denied by sandbox: {message}")]
SandboxDenied {
message: String,
output: ExecToolCallOutput,
},
}
impl UnifiedExecError {
pub(crate) fn create_session(error: anyhow::Error) -> Self {
Self::CreateSession { pty_error: error }
pub(crate) fn create_session(message: String) -> Self {
Self::CreateSession { message }
}
pub(crate) fn sandbox_denied(message: String, output: ExecToolCallOutput) -> Self {
Self::SandboxDenied { message, output }
}
}

View File

@@ -1,36 +1,55 @@
use portable_pty::CommandBuilder;
use portable_pty::PtySize;
use portable_pty::native_pty_system;
use std::collections::HashMap;
use std::collections::VecDeque;
use std::io::ErrorKind;
use std::io::Read;
use std::sync::Arc;
use std::sync::Mutex as StdMutex;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicI32;
use std::sync::atomic::Ordering;
use tokio::sync::Mutex;
use tokio::sync::Notify;
use tokio::sync::mpsc;
use tokio::task::JoinHandle;
use tokio::time::Duration;
use tokio::time::Instant;
//! Unified Exec: interactive PTY execution orchestrated with approvals + sandboxing.
//!
//! Responsibilities
//! - Manages interactive PTY sessions (create, reuse, buffer output with caps).
//! - Uses the shared ToolOrchestrator to handle approval, sandbox selection, and
//! retry semantics in a single, descriptive flow.
//! - Spawns the PTY from a sandboxtransformed `ExecEnv`; on sandbox denial,
//! retries without sandbox when policy allows (no reprompt thanks to caching).
//! - Uses the shared `is_likely_sandbox_denied` heuristic to keep denial messages
//! consistent with other exec paths.
//!
//! Flow at a glance (open session)
//! 1) Build a small request `{ command, cwd }`.
//! 2) Orchestrator: approval (bypass/cache/prompt) → select sandbox → run.
//! 3) Runtime: transform `CommandSpec` → `ExecEnv` → spawn PTY.
//! 4) If denial, orchestrator retries with `SandboxType::None`.
//! 5) Session is returned with streaming output + metadata.
//!
//! This keeps policy logic and user interaction centralized while the PTY/session
//! concerns remain isolated here. The implementation is split between:
//! - `session.rs`: PTY session lifecycle + output buffering.
//! - `session_manager.rs`: orchestration (approvals, sandboxing, reuse) and request handling.
use crate::exec_command::ExecCommandSession;
use crate::truncate::truncate_middle;
use std::collections::HashMap;
use std::sync::atomic::AtomicI32;
use tokio::sync::Mutex;
use crate::codex::Session;
use crate::codex::TurnContext;
mod errors;
mod session;
mod session_manager;
pub(crate) use errors::UnifiedExecError;
pub(crate) use session::UnifiedExecSession;
const DEFAULT_TIMEOUT_MS: u64 = 1_000;
const MAX_TIMEOUT_MS: u64 = 60_000;
const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 128 * 1024; // 128 KiB
pub(crate) struct UnifiedExecContext<'a> {
pub session: &'a Session,
pub turn: &'a TurnContext,
pub sub_id: &'a str,
pub call_id: &'a str,
pub session_id: Option<i32>,
}
#[derive(Debug)]
pub(crate) struct UnifiedExecRequest<'a> {
pub session_id: Option<i32>,
pub input_chunks: &'a [String],
pub timeout_ms: Option<u64>,
}
@@ -44,379 +63,60 @@ pub(crate) struct UnifiedExecResult {
#[derive(Debug, Default)]
pub(crate) struct UnifiedExecSessionManager {
next_session_id: AtomicI32,
sessions: Mutex<HashMap<i32, ManagedUnifiedExecSession>>,
}
#[derive(Debug)]
struct ManagedUnifiedExecSession {
session: ExecCommandSession,
output_buffer: OutputBuffer,
/// Notifies waiters whenever new output has been appended to
/// `output_buffer`, allowing clients to poll for fresh data.
output_notify: Arc<Notify>,
output_task: JoinHandle<()>,
}
#[derive(Debug, Default)]
struct OutputBufferState {
chunks: VecDeque<Vec<u8>>,
total_bytes: usize,
}
impl OutputBufferState {
fn push_chunk(&mut self, chunk: Vec<u8>) {
self.total_bytes = self.total_bytes.saturating_add(chunk.len());
self.chunks.push_back(chunk);
let mut excess = self
.total_bytes
.saturating_sub(UNIFIED_EXEC_OUTPUT_MAX_BYTES);
while excess > 0 {
match self.chunks.front_mut() {
Some(front) if excess >= front.len() => {
excess -= front.len();
self.total_bytes = self.total_bytes.saturating_sub(front.len());
self.chunks.pop_front();
}
Some(front) => {
front.drain(..excess);
self.total_bytes = self.total_bytes.saturating_sub(excess);
break;
}
None => break,
}
}
}
fn drain(&mut self) -> Vec<Vec<u8>> {
let drained: Vec<Vec<u8>> = self.chunks.drain(..).collect();
self.total_bytes = 0;
drained
}
}
type OutputBuffer = Arc<Mutex<OutputBufferState>>;
type OutputHandles = (OutputBuffer, Arc<Notify>);
impl ManagedUnifiedExecSession {
fn new(
session: ExecCommandSession,
initial_output_rx: tokio::sync::broadcast::Receiver<Vec<u8>>,
) -> Self {
let output_buffer = Arc::new(Mutex::new(OutputBufferState::default()));
let output_notify = Arc::new(Notify::new());
let mut receiver = initial_output_rx;
let buffer_clone = Arc::clone(&output_buffer);
let notify_clone = Arc::clone(&output_notify);
let output_task = tokio::spawn(async move {
loop {
match receiver.recv().await {
Ok(chunk) => {
let mut guard = buffer_clone.lock().await;
guard.push_chunk(chunk);
drop(guard);
notify_clone.notify_waiters();
}
// If we lag behind the broadcast buffer, skip missed
// messages but keep the task alive to continue streaming.
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => {
continue;
}
// When the sender closes, exit the task.
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
}
}
});
Self {
session,
output_buffer,
output_notify,
output_task,
}
}
fn writer_sender(&self) -> mpsc::Sender<Vec<u8>> {
self.session.writer_sender()
}
fn output_handles(&self) -> OutputHandles {
(
Arc::clone(&self.output_buffer),
Arc::clone(&self.output_notify),
)
}
fn has_exited(&self) -> bool {
self.session.has_exited()
}
}
impl Drop for ManagedUnifiedExecSession {
fn drop(&mut self) {
self.output_task.abort();
}
}
impl UnifiedExecSessionManager {
pub async fn handle_request(
&self,
request: UnifiedExecRequest<'_>,
) -> Result<UnifiedExecResult, UnifiedExecError> {
let (timeout_ms, timeout_warning) = match request.timeout_ms {
Some(requested) if requested > MAX_TIMEOUT_MS => (
MAX_TIMEOUT_MS,
Some(format!(
"Warning: requested timeout {requested}ms exceeds maximum of {MAX_TIMEOUT_MS}ms; clamping to {MAX_TIMEOUT_MS}ms.\n"
)),
),
Some(requested) => (requested, None),
None => (DEFAULT_TIMEOUT_MS, None),
};
let mut new_session: Option<ManagedUnifiedExecSession> = None;
let session_id;
let writer_tx;
let output_buffer;
let output_notify;
if let Some(existing_id) = request.session_id {
let mut sessions = self.sessions.lock().await;
match sessions.get(&existing_id) {
Some(session) => {
if session.has_exited() {
sessions.remove(&existing_id);
return Err(UnifiedExecError::UnknownSessionId {
session_id: existing_id,
});
}
let (buffer, notify) = session.output_handles();
session_id = existing_id;
writer_tx = session.writer_sender();
output_buffer = buffer;
output_notify = notify;
}
None => {
return Err(UnifiedExecError::UnknownSessionId {
session_id: existing_id,
});
}
}
drop(sessions);
} else {
let command = request.input_chunks.to_vec();
let new_id = self.next_session_id.fetch_add(1, Ordering::SeqCst);
let (session, initial_output_rx) = create_unified_exec_session(&command).await?;
let managed_session = ManagedUnifiedExecSession::new(session, initial_output_rx);
let (buffer, notify) = managed_session.output_handles();
writer_tx = managed_session.writer_sender();
output_buffer = buffer;
output_notify = notify;
session_id = new_id;
new_session = Some(managed_session);
};
if request.session_id.is_some() {
let joined_input = request.input_chunks.join(" ");
if !joined_input.is_empty() && writer_tx.send(joined_input.into_bytes()).await.is_err()
{
return Err(UnifiedExecError::WriteToStdin);
}
}
let mut collected: Vec<u8> = Vec::with_capacity(4096);
let start = Instant::now();
let deadline = start + Duration::from_millis(timeout_ms);
loop {
let drained_chunks;
let mut wait_for_output = None;
{
let mut guard = output_buffer.lock().await;
drained_chunks = guard.drain();
if drained_chunks.is_empty() {
wait_for_output = Some(output_notify.notified());
}
}
if drained_chunks.is_empty() {
let remaining = deadline.saturating_duration_since(Instant::now());
if remaining == Duration::ZERO {
break;
}
let notified = wait_for_output.unwrap_or_else(|| output_notify.notified());
tokio::pin!(notified);
tokio::select! {
_ = &mut notified => {}
_ = tokio::time::sleep(remaining) => break,
}
continue;
}
for chunk in drained_chunks {
collected.extend_from_slice(&chunk);
}
if Instant::now() >= deadline {
break;
}
}
let (output, _maybe_tokens) = truncate_middle(
&String::from_utf8_lossy(&collected),
UNIFIED_EXEC_OUTPUT_MAX_BYTES,
);
let output = if let Some(warning) = timeout_warning {
format!("{warning}{output}")
} else {
output
};
let should_store_session = if let Some(session) = new_session.as_ref() {
!session.has_exited()
} else if request.session_id.is_some() {
let mut sessions = self.sessions.lock().await;
if let Some(existing) = sessions.get(&session_id) {
if existing.has_exited() {
sessions.remove(&session_id);
false
} else {
true
}
} else {
false
}
} else {
true
};
if should_store_session {
if let Some(session) = new_session {
self.sessions.lock().await.insert(session_id, session);
}
Ok(UnifiedExecResult {
session_id: Some(session_id),
output,
})
} else {
Ok(UnifiedExecResult {
session_id: None,
output,
})
}
}
}
async fn create_unified_exec_session(
command: &[String],
) -> Result<
(
ExecCommandSession,
tokio::sync::broadcast::Receiver<Vec<u8>>,
),
UnifiedExecError,
> {
if command.is_empty() {
return Err(UnifiedExecError::MissingCommandLine);
}
let pty_system = native_pty_system();
let pair = pty_system
.openpty(PtySize {
rows: 24,
cols: 80,
pixel_width: 0,
pixel_height: 0,
})
.map_err(UnifiedExecError::create_session)?;
// Safe thanks to the check at the top of the function.
let mut command_builder = CommandBuilder::new(command[0].clone());
for arg in &command[1..] {
command_builder.arg(arg);
}
let mut child = pair
.slave
.spawn_command(command_builder)
.map_err(UnifiedExecError::create_session)?;
let killer = child.clone_killer();
let (writer_tx, mut writer_rx) = mpsc::channel::<Vec<u8>>(128);
let (output_tx, _) = tokio::sync::broadcast::channel::<Vec<u8>>(256);
let mut reader = pair
.master
.try_clone_reader()
.map_err(UnifiedExecError::create_session)?;
let output_tx_clone = output_tx.clone();
let reader_handle = tokio::task::spawn_blocking(move || {
let mut buf = [0u8; 8192];
loop {
match reader.read(&mut buf) {
Ok(0) => break,
Ok(n) => {
let _ = output_tx_clone.send(buf[..n].to_vec());
}
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(ref e) if e.kind() == ErrorKind::WouldBlock => {
std::thread::sleep(Duration::from_millis(5));
continue;
}
Err(_) => break,
}
}
});
let writer = pair
.master
.take_writer()
.map_err(UnifiedExecError::create_session)?;
let writer = Arc::new(StdMutex::new(writer));
let writer_handle = tokio::spawn({
let writer = writer.clone();
async move {
while let Some(bytes) = writer_rx.recv().await {
let writer = writer.clone();
let _ = tokio::task::spawn_blocking(move || {
if let Ok(mut guard) = writer.lock() {
use std::io::Write;
let _ = guard.write_all(&bytes);
let _ = guard.flush();
}
})
.await;
}
}
});
let exit_status = Arc::new(AtomicBool::new(false));
let wait_exit_status = Arc::clone(&exit_status);
let wait_handle = tokio::task::spawn_blocking(move || {
let _ = child.wait();
wait_exit_status.store(true, Ordering::SeqCst);
});
let (session, initial_output_rx) = ExecCommandSession::new(
writer_tx,
output_tx,
killer,
reader_handle,
writer_handle,
wait_handle,
exit_status,
);
Ok((session, initial_output_rx))
sessions: Mutex<HashMap<i32, session::UnifiedExecSession>>,
}
#[cfg(test)]
#[cfg(unix)]
mod tests {
use super::*;
#[cfg(unix)]
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::codex::make_session_and_context;
use crate::protocol::AskForApproval;
use crate::protocol::SandboxPolicy;
use core_test_support::skip_if_sandbox;
use std::sync::Arc;
use tokio::time::Duration;
use super::session::OutputBufferState;
fn test_session_and_turn() -> (Arc<Session>, Arc<TurnContext>) {
let (session, mut turn) = make_session_and_context();
turn.approval_policy = AskForApproval::Never;
turn.sandbox_policy = SandboxPolicy::DangerFullAccess;
(Arc::new(session), Arc::new(turn))
}
async fn run_unified_exec_request(
session: &Arc<Session>,
turn: &Arc<TurnContext>,
session_id: Option<i32>,
input: Vec<String>,
timeout_ms: Option<u64>,
) -> Result<UnifiedExecResult, UnifiedExecError> {
let request_input = input;
let request = UnifiedExecRequest {
input_chunks: &request_input,
timeout_ms,
};
session
.services
.unified_exec_manager
.handle_request(
request,
UnifiedExecContext {
session,
turn: turn.as_ref(),
sub_id: "sub",
call_id: "call",
session_id,
},
)
.await
}
#[test]
fn push_chunk_trims_only_excess_bytes() {
@@ -426,167 +126,170 @@ mod tests {
buffer.push_chunk(vec![b'c']);
assert_eq!(buffer.total_bytes, UNIFIED_EXEC_OUTPUT_MAX_BYTES);
assert_eq!(buffer.chunks.len(), 3);
let snapshot = buffer.snapshot();
assert_eq!(snapshot.len(), 3);
assert_eq!(
buffer.chunks.front().unwrap().len(),
snapshot.first().unwrap().len(),
UNIFIED_EXEC_OUTPUT_MAX_BYTES - 2
);
assert_eq!(buffer.chunks.pop_back().unwrap(), vec![b'c']);
assert_eq!(buffer.chunks.pop_back().unwrap(), vec![b'b']);
assert_eq!(snapshot.get(2).unwrap(), &vec![b'c']);
assert_eq!(snapshot.get(1).unwrap(), &vec![b'b']);
}
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_persists_across_requests_jif() -> Result<(), UnifiedExecError> {
async fn unified_exec_persists_across_requests() -> anyhow::Result<()> {
skip_if_sandbox!(Ok(()));
let manager = UnifiedExecSessionManager::default();
let (session, turn) = test_session_and_turn();
let open_shell = manager
.handle_request(UnifiedExecRequest {
session_id: None,
input_chunks: &["bash".to_string(), "-i".to_string()],
timeout_ms: Some(2_500),
})
.await?;
let open_shell = run_unified_exec_request(
&session,
&turn,
None,
vec!["bash".to_string(), "-i".to_string()],
Some(2_500),
)
.await?;
let session_id = open_shell.session_id.expect("expected session_id");
manager
.handle_request(UnifiedExecRequest {
session_id: Some(session_id),
input_chunks: &[
"export".to_string(),
"CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
],
timeout_ms: Some(2_500),
})
.await?;
run_unified_exec_request(
&session,
&turn,
Some(session_id),
vec![
"export".to_string(),
"CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
],
Some(2_500),
)
.await?;
let out_2 = manager
.handle_request(UnifiedExecRequest {
session_id: Some(session_id),
input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
timeout_ms: Some(2_500),
})
.await?;
let out_2 = run_unified_exec_request(
&session,
&turn,
Some(session_id),
vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
Some(2_500),
)
.await?;
assert!(out_2.output.contains("codex"));
Ok(())
}
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn multi_unified_exec_sessions() -> Result<(), UnifiedExecError> {
async fn multi_unified_exec_sessions() -> anyhow::Result<()> {
skip_if_sandbox!(Ok(()));
let manager = UnifiedExecSessionManager::default();
let (session, turn) = test_session_and_turn();
let shell_a = manager
.handle_request(UnifiedExecRequest {
session_id: None,
input_chunks: &["/bin/bash".to_string(), "-i".to_string()],
timeout_ms: Some(2_500),
})
.await?;
let shell_a = run_unified_exec_request(
&session,
&turn,
None,
vec!["/bin/bash".to_string(), "-i".to_string()],
Some(2_500),
)
.await?;
let session_a = shell_a.session_id.expect("expected session id");
manager
.handle_request(UnifiedExecRequest {
session_id: Some(session_a),
input_chunks: &["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()],
timeout_ms: Some(2_500),
})
.await?;
run_unified_exec_request(
&session,
&turn,
Some(session_a),
vec!["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()],
Some(2_500),
)
.await?;
let out_2 = manager
.handle_request(UnifiedExecRequest {
session_id: None,
input_chunks: &[
"echo".to_string(),
"$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(),
],
timeout_ms: Some(2_500),
})
.await?;
let out_2 = run_unified_exec_request(
&session,
&turn,
None,
vec![
"echo".to_string(),
"$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(),
],
Some(2_500),
)
.await?;
assert!(!out_2.output.contains("codex"));
let out_3 = manager
.handle_request(UnifiedExecRequest {
session_id: Some(session_a),
input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
timeout_ms: Some(2_500),
})
.await?;
let out_3 = run_unified_exec_request(
&session,
&turn,
Some(session_a),
vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
Some(2_500),
)
.await?;
assert!(out_3.output.contains("codex"));
Ok(())
}
#[cfg(unix)]
#[tokio::test]
async fn unified_exec_timeouts() -> Result<(), UnifiedExecError> {
async fn unified_exec_timeouts() -> anyhow::Result<()> {
skip_if_sandbox!(Ok(()));
let manager = UnifiedExecSessionManager::default();
let (session, turn) = test_session_and_turn();
let open_shell = manager
.handle_request(UnifiedExecRequest {
session_id: None,
input_chunks: &["bash".to_string(), "-i".to_string()],
timeout_ms: Some(2_500),
})
.await?;
let open_shell = run_unified_exec_request(
&session,
&turn,
None,
vec!["bash".to_string(), "-i".to_string()],
Some(2_500),
)
.await?;
let session_id = open_shell.session_id.expect("expected session id");
manager
.handle_request(UnifiedExecRequest {
session_id: Some(session_id),
input_chunks: &[
"export".to_string(),
"CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
],
timeout_ms: Some(2_500),
})
.await?;
run_unified_exec_request(
&session,
&turn,
Some(session_id),
vec![
"export".to_string(),
"CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
],
Some(2_500),
)
.await?;
let out_2 = manager
.handle_request(UnifiedExecRequest {
session_id: Some(session_id),
input_chunks: &["sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
timeout_ms: Some(10),
})
.await?;
let out_2 = run_unified_exec_request(
&session,
&turn,
Some(session_id),
vec!["sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
Some(10),
)
.await?;
assert!(!out_2.output.contains("codex"));
tokio::time::sleep(Duration::from_secs(7)).await;
let empty = Vec::new();
let out_3 = manager
.handle_request(UnifiedExecRequest {
session_id: Some(session_id),
input_chunks: &empty,
timeout_ms: Some(100),
})
.await?;
let out_3 =
run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100))
.await?;
assert!(out_3.output.contains("codex"));
Ok(())
}
#[cfg(unix)]
#[tokio::test]
#[ignore] // Ignored while we have a better way to test this.
async fn requests_with_large_timeout_are_capped() -> Result<(), UnifiedExecError> {
let manager = UnifiedExecSessionManager::default();
async fn requests_with_large_timeout_are_capped() -> anyhow::Result<()> {
let (session, turn) = test_session_and_turn();
let result = manager
.handle_request(UnifiedExecRequest {
session_id: None,
input_chunks: &["echo".to_string(), "codex".to_string()],
timeout_ms: Some(120_000),
})
.await?;
let result = run_unified_exec_request(
&session,
&turn,
None,
vec!["echo".to_string(), "codex".to_string()],
Some(120_000),
)
.await?;
assert!(result.output.starts_with(
"Warning: requested timeout 120000ms exceeds maximum of 60000ms; clamping to 60000ms.\n"
@@ -596,61 +299,66 @@ mod tests {
Ok(())
}
#[cfg(unix)]
#[tokio::test]
#[ignore] // Ignored while we have a better way to test this.
async fn completed_commands_do_not_persist_sessions() -> Result<(), UnifiedExecError> {
let manager = UnifiedExecSessionManager::default();
let result = manager
.handle_request(UnifiedExecRequest {
session_id: None,
input_chunks: &["/bin/echo".to_string(), "codex".to_string()],
timeout_ms: Some(2_500),
})
.await?;
async fn completed_commands_do_not_persist_sessions() -> anyhow::Result<()> {
let (session, turn) = test_session_and_turn();
let result = run_unified_exec_request(
&session,
&turn,
None,
vec!["/bin/echo".to_string(), "codex".to_string()],
Some(2_500),
)
.await?;
assert!(result.session_id.is_none());
assert!(result.output.contains("codex"));
assert!(manager.sessions.lock().await.is_empty());
assert!(
session
.services
.unified_exec_manager
.sessions
.lock()
.await
.is_empty()
);
Ok(())
}
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn reusing_completed_session_returns_unknown_session() -> Result<(), UnifiedExecError> {
async fn reusing_completed_session_returns_unknown_session() -> anyhow::Result<()> {
skip_if_sandbox!(Ok(()));
let manager = UnifiedExecSessionManager::default();
let (session, turn) = test_session_and_turn();
let open_shell = manager
.handle_request(UnifiedExecRequest {
session_id: None,
input_chunks: &["/bin/bash".to_string(), "-i".to_string()],
timeout_ms: Some(2_500),
})
.await?;
let open_shell = run_unified_exec_request(
&session,
&turn,
None,
vec!["/bin/bash".to_string(), "-i".to_string()],
Some(2_500),
)
.await?;
let session_id = open_shell.session_id.expect("expected session id");
manager
.handle_request(UnifiedExecRequest {
session_id: Some(session_id),
input_chunks: &["exit\n".to_string()],
timeout_ms: Some(2_500),
})
.await?;
run_unified_exec_request(
&session,
&turn,
Some(session_id),
vec!["exit\n".to_string()],
Some(2_500),
)
.await?;
tokio::time::sleep(Duration::from_millis(200)).await;
let err = manager
.handle_request(UnifiedExecRequest {
session_id: Some(session_id),
input_chunks: &[],
timeout_ms: Some(100),
})
.await
.expect_err("expected unknown session error");
let err =
run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100))
.await
.expect_err("expected unknown session error");
match err {
UnifiedExecError::UnknownSessionId { session_id: err_id } => {
@@ -659,7 +367,15 @@ mod tests {
other => panic!("expected UnknownSessionId, got {other:?}"),
}
assert!(!manager.sessions.lock().await.contains_key(&session_id));
assert!(
!session
.services
.unified_exec_manager
.sessions
.lock()
.await
.contains_key(&session_id)
);
Ok(())
}

View File

@@ -0,0 +1,217 @@
#![allow(clippy::module_inception)]
use std::collections::VecDeque;
use std::sync::Arc;
use tokio::sync::Mutex;
use tokio::sync::Notify;
use tokio::sync::mpsc;
use tokio::sync::oneshot::error::TryRecvError;
use tokio::task::JoinHandle;
use tokio::time::Duration;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StreamOutput;
use crate::exec::is_likely_sandbox_denied;
use crate::truncate::truncate_middle;
use codex_utils_pty::ExecCommandSession;
use codex_utils_pty::SpawnedPty;
use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES;
use super::UnifiedExecError;
#[derive(Debug, Default)]
pub(crate) struct OutputBufferState {
chunks: VecDeque<Vec<u8>>,
pub(crate) total_bytes: usize,
}
impl OutputBufferState {
pub(super) fn push_chunk(&mut self, chunk: Vec<u8>) {
self.total_bytes = self.total_bytes.saturating_add(chunk.len());
self.chunks.push_back(chunk);
let mut excess = self
.total_bytes
.saturating_sub(UNIFIED_EXEC_OUTPUT_MAX_BYTES);
while excess > 0 {
match self.chunks.front_mut() {
Some(front) if excess >= front.len() => {
excess -= front.len();
self.total_bytes = self.total_bytes.saturating_sub(front.len());
self.chunks.pop_front();
}
Some(front) => {
front.drain(..excess);
self.total_bytes = self.total_bytes.saturating_sub(excess);
break;
}
None => break,
}
}
}
pub(super) fn drain(&mut self) -> Vec<Vec<u8>> {
let drained: Vec<Vec<u8>> = self.chunks.drain(..).collect();
self.total_bytes = 0;
drained
}
pub(super) fn snapshot(&self) -> Vec<Vec<u8>> {
self.chunks.iter().cloned().collect()
}
}
pub(crate) type OutputBuffer = Arc<Mutex<OutputBufferState>>;
pub(crate) type OutputHandles = (OutputBuffer, Arc<Notify>);
#[derive(Debug)]
pub(crate) struct UnifiedExecSession {
session: ExecCommandSession,
output_buffer: OutputBuffer,
output_notify: Arc<Notify>,
output_task: JoinHandle<()>,
sandbox_type: SandboxType,
}
impl UnifiedExecSession {
pub(super) fn new(
session: ExecCommandSession,
initial_output_rx: tokio::sync::broadcast::Receiver<Vec<u8>>,
sandbox_type: SandboxType,
) -> Self {
let output_buffer = Arc::new(Mutex::new(OutputBufferState::default()));
let output_notify = Arc::new(Notify::new());
let mut receiver = initial_output_rx;
let buffer_clone = Arc::clone(&output_buffer);
let notify_clone = Arc::clone(&output_notify);
let output_task = tokio::spawn(async move {
loop {
match receiver.recv().await {
Ok(chunk) => {
let mut guard = buffer_clone.lock().await;
guard.push_chunk(chunk);
drop(guard);
notify_clone.notify_waiters();
}
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
}
}
});
Self {
session,
output_buffer,
output_notify,
output_task,
sandbox_type,
}
}
pub(super) fn writer_sender(&self) -> mpsc::Sender<Vec<u8>> {
self.session.writer_sender()
}
pub(super) fn output_handles(&self) -> OutputHandles {
(
Arc::clone(&self.output_buffer),
Arc::clone(&self.output_notify),
)
}
pub(super) fn has_exited(&self) -> bool {
self.session.has_exited()
}
pub(super) fn exit_code(&self) -> Option<i32> {
self.session.exit_code()
}
async fn snapshot_output(&self) -> Vec<Vec<u8>> {
let guard = self.output_buffer.lock().await;
guard.snapshot()
}
fn sandbox_type(&self) -> SandboxType {
self.sandbox_type
}
pub(super) async fn check_for_sandbox_denial(&self) -> Result<(), UnifiedExecError> {
if self.sandbox_type() == SandboxType::None || !self.has_exited() {
return Ok(());
}
let _ =
tokio::time::timeout(Duration::from_millis(20), self.output_notify.notified()).await;
let collected_chunks = self.snapshot_output().await;
let mut aggregated: Vec<u8> = Vec::new();
for chunk in collected_chunks {
aggregated.extend_from_slice(&chunk);
}
let aggregated_text = String::from_utf8_lossy(&aggregated).to_string();
let exit_code = self.exit_code().unwrap_or(-1);
let exec_output = ExecToolCallOutput {
exit_code,
stdout: StreamOutput::new(aggregated_text.clone()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(aggregated_text.clone()),
duration: Duration::ZERO,
timed_out: false,
};
if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) {
let (snippet, _) = truncate_middle(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_BYTES);
let message = if snippet.is_empty() {
format!("exit code {exit_code}")
} else {
snippet
};
return Err(UnifiedExecError::sandbox_denied(message, exec_output));
}
Ok(())
}
pub(super) async fn from_spawned(
spawned: SpawnedPty,
sandbox_type: SandboxType,
) -> Result<Self, UnifiedExecError> {
let SpawnedPty {
session,
output_rx,
mut exit_rx,
} = spawned;
let managed = Self::new(session, output_rx, sandbox_type);
let exit_ready = match exit_rx.try_recv() {
Ok(_) | Err(TryRecvError::Closed) => true,
Err(TryRecvError::Empty) => false,
};
if exit_ready {
managed.check_for_sandbox_denial().await?;
return Ok(managed);
}
tokio::pin!(exit_rx);
if tokio::time::timeout(Duration::from_millis(50), &mut exit_rx)
.await
.is_ok()
{
managed.check_for_sandbox_denial().await?;
}
Ok(managed)
}
}
impl Drop for UnifiedExecSession {
fn drop(&mut self) {
self.output_task.abort();
}
}

View File

@@ -0,0 +1,288 @@
use std::sync::Arc;
use tokio::sync::Notify;
use tokio::sync::mpsc;
use tokio::time::Duration;
use tokio::time::Instant;
use crate::exec_env::create_env;
use crate::sandboxing::ExecEnv;
use crate::tools::orchestrator::ToolOrchestrator;
use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
use crate::tools::sandboxing::ToolCtx;
use crate::truncate::truncate_middle;
use super::DEFAULT_TIMEOUT_MS;
use super::MAX_TIMEOUT_MS;
use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES;
use super::UnifiedExecContext;
use super::UnifiedExecError;
use super::UnifiedExecRequest;
use super::UnifiedExecResult;
use super::UnifiedExecSessionManager;
use super::session::OutputBuffer;
use super::session::UnifiedExecSession;
pub(super) struct SessionAcquisition {
pub(super) session_id: i32,
pub(super) writer_tx: mpsc::Sender<Vec<u8>>,
pub(super) output_buffer: OutputBuffer,
pub(super) output_notify: Arc<Notify>,
pub(super) new_session: Option<UnifiedExecSession>,
pub(super) reuse_requested: bool,
}
impl UnifiedExecSessionManager {
pub(super) async fn acquire_session(
&self,
request: &UnifiedExecRequest<'_>,
context: &UnifiedExecContext<'_>,
) -> Result<SessionAcquisition, UnifiedExecError> {
if let Some(existing_id) = context.session_id {
let mut sessions = self.sessions.lock().await;
match sessions.get(&existing_id) {
Some(session) => {
if session.has_exited() {
sessions.remove(&existing_id);
return Err(UnifiedExecError::UnknownSessionId {
session_id: existing_id,
});
}
let (buffer, notify) = session.output_handles();
let writer_tx = session.writer_sender();
Ok(SessionAcquisition {
session_id: existing_id,
writer_tx,
output_buffer: buffer,
output_notify: notify,
new_session: None,
reuse_requested: true,
})
}
None => Err(UnifiedExecError::UnknownSessionId {
session_id: existing_id,
}),
}
} else {
let new_id = self
.next_session_id
.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
let managed_session = self
.open_session_with_sandbox(request.input_chunks.to_vec(), context)
.await?;
let (buffer, notify) = managed_session.output_handles();
let writer_tx = managed_session.writer_sender();
Ok(SessionAcquisition {
session_id: new_id,
writer_tx,
output_buffer: buffer,
output_notify: notify,
new_session: Some(managed_session),
reuse_requested: false,
})
}
}
pub(crate) async fn open_session_with_exec_env(
&self,
env: &ExecEnv,
) -> Result<UnifiedExecSession, UnifiedExecError> {
let (program, args) = env
.command
.split_first()
.ok_or(UnifiedExecError::MissingCommandLine)?;
let spawned =
codex_utils_pty::spawn_pty_process(program, args, env.cwd.as_path(), &env.env)
.await
.map_err(|err| UnifiedExecError::create_session(err.to_string()))?;
UnifiedExecSession::from_spawned(spawned, env.sandbox).await
}
pub(super) async fn open_session_with_sandbox(
&self,
command: Vec<String>,
context: &UnifiedExecContext<'_>,
) -> Result<UnifiedExecSession, UnifiedExecError> {
let mut orchestrator = ToolOrchestrator::new();
let mut runtime = UnifiedExecRuntime::new(self);
let req = UnifiedExecToolRequest::new(
command,
context.turn.cwd.clone(),
create_env(&context.turn.shell_environment_policy),
);
let tool_ctx = ToolCtx {
session: context.session,
sub_id: context.sub_id.to_string(),
call_id: context.call_id.to_string(),
tool_name: "unified_exec".to_string(),
};
orchestrator
.run(
&mut runtime,
&req,
&tool_ctx,
context.turn,
context.turn.approval_policy,
)
.await
.map_err(|e| UnifiedExecError::create_session(format!("{e:?}")))
}
pub(super) async fn collect_output_until_deadline(
output_buffer: &OutputBuffer,
output_notify: &Arc<Notify>,
deadline: Instant,
) -> Vec<u8> {
let mut collected: Vec<u8> = Vec::with_capacity(4096);
loop {
let drained_chunks;
let mut wait_for_output = None;
{
let mut guard = output_buffer.lock().await;
drained_chunks = guard.drain();
if drained_chunks.is_empty() {
wait_for_output = Some(output_notify.notified());
}
}
if drained_chunks.is_empty() {
let remaining = deadline.saturating_duration_since(Instant::now());
if remaining == Duration::ZERO {
break;
}
let notified = wait_for_output.unwrap_or_else(|| output_notify.notified());
tokio::pin!(notified);
tokio::select! {
_ = &mut notified => {}
_ = tokio::time::sleep(remaining) => break,
}
continue;
}
for chunk in drained_chunks {
collected.extend_from_slice(&chunk);
}
if Instant::now() >= deadline {
break;
}
}
collected
}
pub(super) async fn should_store_session(&self, acquisition: &SessionAcquisition) -> bool {
if let Some(session) = acquisition.new_session.as_ref() {
!session.has_exited()
} else if acquisition.reuse_requested {
let mut sessions = self.sessions.lock().await;
if let Some(existing) = sessions.get(&acquisition.session_id) {
if existing.has_exited() {
sessions.remove(&acquisition.session_id);
false
} else {
true
}
} else {
false
}
} else {
true
}
}
pub(super) async fn send_input_chunks(
writer_tx: &mpsc::Sender<Vec<u8>>,
chunks: &[String],
) -> Result<(), UnifiedExecError> {
let mut trailing_whitespace = true;
for chunk in chunks {
if chunk.is_empty() {
continue;
}
let leading_whitespace = chunk
.chars()
.next()
.map(char::is_whitespace)
.unwrap_or(true);
if !trailing_whitespace
&& !leading_whitespace
&& writer_tx.send(vec![b' ']).await.is_err()
{
return Err(UnifiedExecError::WriteToStdin);
}
if writer_tx.send(chunk.as_bytes().to_vec()).await.is_err() {
return Err(UnifiedExecError::WriteToStdin);
}
trailing_whitespace = chunk
.chars()
.next_back()
.map(char::is_whitespace)
.unwrap_or(trailing_whitespace);
}
Ok(())
}
pub async fn handle_request(
&self,
request: UnifiedExecRequest<'_>,
context: UnifiedExecContext<'_>,
) -> Result<UnifiedExecResult, UnifiedExecError> {
let (timeout_ms, timeout_warning) = match request.timeout_ms {
Some(requested) if requested > MAX_TIMEOUT_MS => (
MAX_TIMEOUT_MS,
Some(format!(
"Warning: requested timeout {requested}ms exceeds maximum of {MAX_TIMEOUT_MS}ms; clamping to {MAX_TIMEOUT_MS}ms.\n"
)),
),
Some(requested) => (requested, None),
None => (DEFAULT_TIMEOUT_MS, None),
};
let mut acquisition = self.acquire_session(&request, &context).await?;
if acquisition.reuse_requested {
Self::send_input_chunks(&acquisition.writer_tx, request.input_chunks).await?;
}
let deadline = Instant::now() + Duration::from_millis(timeout_ms);
let collected = Self::collect_output_until_deadline(
&acquisition.output_buffer,
&acquisition.output_notify,
deadline,
)
.await;
let (output, _maybe_tokens) = truncate_middle(
&String::from_utf8_lossy(&collected),
UNIFIED_EXEC_OUTPUT_MAX_BYTES,
);
let output = if let Some(warning) = timeout_warning {
format!("{warning}{output}")
} else {
output
};
let should_store_session = self.should_store_session(&acquisition).await;
let session_id = if should_store_session {
if let Some(session) = acquisition.new_session.take() {
self.sessions
.lock()
.await
.insert(acquisition.session_id, session);
}
Some(acquisition.session_id)
} else {
None
};
Ok(UnifiedExecResult { session_id, output })
}
}

View File

@@ -36,6 +36,7 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
arg0: None,
};
let policy = SandboxPolicy::new_read_only_policy();

View File

@@ -1,229 +0,0 @@
#![cfg(unix)]
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Duration;
use async_channel::Receiver;
use codex_core::error::CodexErr;
use codex_core::error::SandboxErr;
use codex_core::exec::ExecParams;
use codex_core::exec::SandboxType;
use codex_core::exec::StdoutStream;
use codex_core::exec::process_exec_tool_call;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecCommandOutputDeltaEvent;
use codex_core::protocol::ExecOutputStream;
use codex_core::protocol::SandboxPolicy;
fn collect_stdout_events(rx: Receiver<Event>) -> Vec<u8> {
let mut out = Vec::new();
while let Ok(ev) = rx.try_recv() {
if let EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent {
stream: ExecOutputStream::Stdout,
chunk,
..
}) = ev.msg
{
out.extend_from_slice(&chunk);
}
}
out
}
#[tokio::test]
async fn test_exec_stdout_stream_events_echo() {
let (tx, rx) = async_channel::unbounded::<Event>();
let stdout_stream = StdoutStream {
sub_id: "test-sub".to_string(),
call_id: "call-1".to_string(),
tx_event: tx,
};
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
// Use printf for predictable behavior across shells
"printf 'hello-world\n'".to_string(),
];
let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
let params = ExecParams {
command: cmd,
cwd: cwd.clone(),
timeout_ms: Some(5_000),
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
};
let policy = SandboxPolicy::new_read_only_policy();
let result = process_exec_tool_call(
params,
SandboxType::None,
&policy,
cwd.as_path(),
&None,
Some(stdout_stream),
)
.await;
let result = match result {
Ok(r) => r,
Err(e) => panic!("process_exec_tool_call failed: {e}"),
};
assert_eq!(result.exit_code, 0);
assert_eq!(result.stdout.text, "hello-world\n");
let streamed = collect_stdout_events(rx);
// We should have received at least the same contents (possibly in one chunk)
assert_eq!(String::from_utf8_lossy(&streamed), "hello-world\n");
}
#[tokio::test]
async fn test_exec_stderr_stream_events_echo() {
let (tx, rx) = async_channel::unbounded::<Event>();
let stdout_stream = StdoutStream {
sub_id: "test-sub".to_string(),
call_id: "call-2".to_string(),
tx_event: tx,
};
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
// Write to stderr explicitly
"printf 'oops\n' 1>&2".to_string(),
];
let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
let params = ExecParams {
command: cmd,
cwd: cwd.clone(),
timeout_ms: Some(5_000),
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
};
let policy = SandboxPolicy::new_read_only_policy();
let result = process_exec_tool_call(
params,
SandboxType::None,
&policy,
cwd.as_path(),
&None,
Some(stdout_stream),
)
.await;
let result = match result {
Ok(r) => r,
Err(e) => panic!("process_exec_tool_call failed: {e}"),
};
assert_eq!(result.exit_code, 0);
assert_eq!(result.stdout.text, "");
assert_eq!(result.stderr.text, "oops\n");
// Collect only stderr delta events
let mut err = Vec::new();
while let Ok(ev) = rx.try_recv() {
if let EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent {
stream: ExecOutputStream::Stderr,
chunk,
..
}) = ev.msg
{
err.extend_from_slice(&chunk);
}
}
assert_eq!(String::from_utf8_lossy(&err), "oops\n");
}
#[tokio::test]
async fn test_aggregated_output_interleaves_in_order() {
// Spawn a shell that alternates stdout and stderr with sleeps to enforce order.
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
"printf 'O1\\n'; sleep 0.01; printf 'E1\\n' 1>&2; sleep 0.01; printf 'O2\\n'; sleep 0.01; printf 'E2\\n' 1>&2".to_string(),
];
let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
let params = ExecParams {
command: cmd,
cwd: cwd.clone(),
timeout_ms: Some(5_000),
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
};
let policy = SandboxPolicy::new_read_only_policy();
let result = process_exec_tool_call(
params,
SandboxType::None,
&policy,
cwd.as_path(),
&None,
None,
)
.await
.expect("process_exec_tool_call");
assert_eq!(result.exit_code, 0);
assert_eq!(result.stdout.text, "O1\nO2\n");
assert_eq!(result.stderr.text, "E1\nE2\n");
assert_eq!(result.aggregated_output.text, "O1\nE1\nO2\nE2\n");
assert_eq!(result.aggregated_output.truncated_after_lines, None);
}
#[tokio::test]
async fn test_exec_timeout_returns_partial_output() {
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
"printf 'before\\n'; sleep 2; printf 'after\\n'".to_string(),
];
let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
let params = ExecParams {
command: cmd,
cwd: cwd.clone(),
timeout_ms: Some(200),
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
};
let policy = SandboxPolicy::new_read_only_policy();
let result = process_exec_tool_call(
params,
SandboxType::None,
&policy,
cwd.as_path(),
&None,
None,
)
.await;
let Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) = result else {
panic!("expected timeout error");
};
assert_eq!(output.exit_code, 124);
assert_eq!(output.stdout.text, "before\n");
assert!(output.stderr.text.is_empty());
assert_eq!(output.aggregated_output.text, "before\n");
assert!(output.duration >= Duration::from_millis(200));
assert!(output.timed_out);
}

View File

@@ -9,7 +9,6 @@ mod client;
mod compact;
mod compact_resume_fork;
mod exec;
mod exec_stream_events;
mod fork_conversation;
mod grep_files;
mod json_result;

View File

@@ -815,11 +815,7 @@ async fn handle_container_exec_user_approved_records_tool_decision() {
mount_sse(
&server,
sse(vec![
ev_local_shell_call(
"user_approved_call",
"completed",
vec!["/bin/echo", "approved"],
),
ev_local_shell_call("user_approved_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)
@@ -881,11 +877,7 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision()
mount_sse(
&server,
sse(vec![
ev_local_shell_call(
"user_approved_session_call",
"completed",
vec!["/bin/echo", "persist"],
),
ev_local_shell_call("user_approved_session_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)
@@ -947,11 +939,7 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() {
mount_sse(
&server,
sse(vec![
ev_local_shell_call(
"sandbox_retry_call",
"completed",
vec!["/bin/echo", "retry"],
),
ev_local_shell_call("sandbox_retry_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)
@@ -1013,7 +1001,7 @@ async fn handle_container_exec_user_denies_records_tool_decision() {
mount_sse(
&server,
sse(vec![
ev_local_shell_call("user_denied_call", "completed", vec!["/bin/echo", "deny"]),
ev_local_shell_call("user_denied_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)
@@ -1075,11 +1063,7 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision()
mount_sse(
&server,
sse(vec![
ev_local_shell_call(
"sandbox_session_call",
"completed",
vec!["/bin/echo", "persist"],
),
ev_local_shell_call("sandbox_session_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)
@@ -1141,7 +1125,7 @@ async fn handle_sandbox_error_user_denies_records_tool_decision() {
mount_sse(
&server,
sse(vec![
ev_local_shell_call("sandbox_deny_call", "completed", vec!["/bin/echo", "deny"]),
ev_local_shell_call("sandbox_deny_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)

View File

@@ -8,8 +8,10 @@ use crate::landlock::apply_sandbox_policy_to_current_thread;
pub struct LandlockCommand {
/// It is possible that the cwd used in the context of the sandbox policy
/// is different from the cwd of the process to spawn.
#[arg(long = "sandbox-policy-cwd")]
pub sandbox_policy_cwd: PathBuf,
#[arg(long = "sandbox-policy")]
pub sandbox_policy: codex_core::protocol::SandboxPolicy,
/// Full command args to run under landlock.

View File

@@ -44,6 +44,7 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
env: create_env_from_core_vars(),
with_escalated_permissions: None,
justification: None,
arg0: None,
};
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
@@ -146,6 +147,7 @@ async fn assert_network_blocked(cmd: &[&str]) {
env: create_env_from_core_vars(),
with_escalated_permissions: None,
justification: None,
arg0: None,
};
let sandbox_policy = SandboxPolicy::new_read_only_policy();

View File

@@ -445,7 +445,7 @@ fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()
r#"
model = "mock-model"
approval_policy = "untrusted"
sandbox_policy = "read-only"
sandbox_policy = "workspace-write"
model_provider = "mock_provider"

View File

@@ -0,0 +1,16 @@
[package]
edition = "2021"
name = "codex-utils-pty"
version = { workspace = true }
[lints]
workspace = true
[dependencies]
anyhow = { workspace = true }
portable-pty = { workspace = true }
tokio = { workspace = true, features = [
"macros",
"rt-multi-thread",
"sync",
] }

View File

@@ -0,0 +1,210 @@
use std::collections::HashMap;
use std::io::ErrorKind;
use std::path::Path;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use std::sync::Mutex as StdMutex;
use std::time::Duration;
use anyhow::Result;
use portable_pty::native_pty_system;
use portable_pty::CommandBuilder;
use portable_pty::PtySize;
use tokio::sync::broadcast;
use tokio::sync::mpsc;
use tokio::sync::oneshot;
use tokio::sync::Mutex as TokioMutex;
use tokio::task::JoinHandle;
#[derive(Debug)]
pub struct ExecCommandSession {
writer_tx: mpsc::Sender<Vec<u8>>,
output_tx: broadcast::Sender<Vec<u8>>,
killer: StdMutex<Option<Box<dyn portable_pty::ChildKiller + Send + Sync>>>,
reader_handle: StdMutex<Option<JoinHandle<()>>>,
writer_handle: StdMutex<Option<JoinHandle<()>>>,
wait_handle: StdMutex<Option<JoinHandle<()>>>,
exit_status: Arc<AtomicBool>,
exit_code: Arc<StdMutex<Option<i32>>>,
}
impl ExecCommandSession {
#[allow(clippy::too_many_arguments)]
pub fn new(
writer_tx: mpsc::Sender<Vec<u8>>,
output_tx: broadcast::Sender<Vec<u8>>,
killer: Box<dyn portable_pty::ChildKiller + Send + Sync>,
reader_handle: JoinHandle<()>,
writer_handle: JoinHandle<()>,
wait_handle: JoinHandle<()>,
exit_status: Arc<AtomicBool>,
exit_code: Arc<StdMutex<Option<i32>>>,
) -> (Self, broadcast::Receiver<Vec<u8>>) {
let initial_output_rx = output_tx.subscribe();
(
Self {
writer_tx,
output_tx,
killer: StdMutex::new(Some(killer)),
reader_handle: StdMutex::new(Some(reader_handle)),
writer_handle: StdMutex::new(Some(writer_handle)),
wait_handle: StdMutex::new(Some(wait_handle)),
exit_status,
exit_code,
},
initial_output_rx,
)
}
pub fn writer_sender(&self) -> mpsc::Sender<Vec<u8>> {
self.writer_tx.clone()
}
pub fn output_receiver(&self) -> broadcast::Receiver<Vec<u8>> {
self.output_tx.subscribe()
}
pub fn has_exited(&self) -> bool {
self.exit_status.load(std::sync::atomic::Ordering::SeqCst)
}
pub fn exit_code(&self) -> Option<i32> {
self.exit_code.lock().ok().and_then(|guard| *guard)
}
}
impl Drop for ExecCommandSession {
fn drop(&mut self) {
if let Ok(mut killer_opt) = self.killer.lock() {
if let Some(mut killer) = killer_opt.take() {
let _ = killer.kill();
}
}
if let Ok(mut h) = self.reader_handle.lock() {
if let Some(handle) = h.take() {
handle.abort();
}
}
if let Ok(mut h) = self.writer_handle.lock() {
if let Some(handle) = h.take() {
handle.abort();
}
}
if let Ok(mut h) = self.wait_handle.lock() {
if let Some(handle) = h.take() {
handle.abort();
}
}
}
}
#[derive(Debug)]
pub struct SpawnedPty {
pub session: ExecCommandSession,
pub output_rx: broadcast::Receiver<Vec<u8>>,
pub exit_rx: oneshot::Receiver<i32>,
}
pub async fn spawn_pty_process(
program: &str,
args: &[String],
cwd: &Path,
env: &HashMap<String, String>,
) -> Result<SpawnedPty> {
if program.is_empty() {
anyhow::bail!("missing program for PTY spawn");
}
let pty_system = native_pty_system();
let pair = pty_system.openpty(PtySize {
rows: 24,
cols: 80,
pixel_width: 0,
pixel_height: 0,
})?;
let mut command_builder = CommandBuilder::new(program);
command_builder.cwd(cwd);
command_builder.env_clear();
for arg in args {
command_builder.arg(arg);
}
for (key, value) in env {
command_builder.env(key, value);
}
let mut child = pair.slave.spawn_command(command_builder)?;
let killer = child.clone_killer();
let (writer_tx, mut writer_rx) = mpsc::channel::<Vec<u8>>(128);
let (output_tx, _) = broadcast::channel::<Vec<u8>>(256);
let mut reader = pair.master.try_clone_reader()?;
let output_tx_clone = output_tx.clone();
let reader_handle: JoinHandle<()> = tokio::task::spawn_blocking(move || {
let mut buf = [0u8; 8_192];
loop {
match reader.read(&mut buf) {
Ok(0) => break,
Ok(n) => {
let _ = output_tx_clone.send(buf[..n].to_vec());
}
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(ref e) if e.kind() == ErrorKind::WouldBlock => {
std::thread::sleep(Duration::from_millis(5));
continue;
}
Err(_) => break,
}
}
});
let writer = pair.master.take_writer()?;
let writer = Arc::new(TokioMutex::new(writer));
let writer_handle: JoinHandle<()> = tokio::spawn({
let writer = Arc::clone(&writer);
async move {
while let Some(bytes) = writer_rx.recv().await {
let mut guard = writer.lock().await;
use std::io::Write;
let _ = guard.write_all(&bytes);
let _ = guard.flush();
}
}
});
let (exit_tx, exit_rx) = oneshot::channel::<i32>();
let exit_status = Arc::new(AtomicBool::new(false));
let wait_exit_status = Arc::clone(&exit_status);
let exit_code = Arc::new(StdMutex::new(None));
let wait_exit_code = Arc::clone(&exit_code);
let wait_handle: JoinHandle<()> = tokio::task::spawn_blocking(move || {
let code = match child.wait() {
Ok(status) => status.exit_code() as i32,
Err(_) => -1,
};
wait_exit_status.store(true, std::sync::atomic::Ordering::SeqCst);
if let Ok(mut guard) = wait_exit_code.lock() {
*guard = Some(code);
}
let _ = exit_tx.send(code);
});
let (session, output_rx) = ExecCommandSession::new(
writer_tx,
output_tx,
killer,
reader_handle,
writer_handle,
wait_handle,
exit_status,
exit_code,
);
Ok(SpawnedPty {
session,
output_rx,
exit_rx,
})
}