chore: rework tools execution workflow (#5278)
Re-work the tool execution flow. Read `orchestrator.rs` to understand the structure
This commit is contained in:
@@ -11,7 +11,7 @@ use crate::error::Result;
|
||||
use crate::error::RetryLimitReachedError;
|
||||
use crate::error::UnexpectedResponseError;
|
||||
use crate::model_family::ModelFamily;
|
||||
use crate::openai_tools::create_tools_json_for_chat_completions_api;
|
||||
use crate::tools::spec::create_tools_json_for_chat_completions_api;
|
||||
use crate::util::backoff;
|
||||
use bytes::Bytes;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
|
||||
@@ -45,12 +45,12 @@ use crate::model_family::ModelFamily;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::WireApi;
|
||||
use crate::openai_model_info::get_model_info;
|
||||
use crate::openai_tools::create_tools_json_for_responses_api;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::RateLimitWindow;
|
||||
use crate::protocol::TokenUsage;
|
||||
use crate::state::TaskKind;
|
||||
use crate::token_data::PlanType;
|
||||
use crate::tools::spec::create_tools_json_for_responses_api;
|
||||
use crate::util::backoff;
|
||||
use chrono::DateTime;
|
||||
use chrono::Utc;
|
||||
|
||||
@@ -281,7 +281,7 @@ pub(crate) struct ResponsesApiRequest<'a> {
|
||||
}
|
||||
|
||||
pub(crate) mod tools {
|
||||
use crate::openai_tools::JsonSchema;
|
||||
use crate::tools::spec::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ use crate::AuthManager;
|
||||
use crate::client_common::REVIEW_PROMPT;
|
||||
use crate::event_mapping::map_response_item_to_event_messages;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::parse_command::parse_command;
|
||||
use crate::review_format::format_review_findings_block;
|
||||
use crate::terminal;
|
||||
use crate::user_notification::UserNotifier;
|
||||
@@ -56,22 +57,14 @@ use crate::conversation_history::ConversationHistory;
|
||||
use crate::environment_context::EnvironmentContext;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result as CodexResult;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
#[cfg(test)]
|
||||
use crate::exec::StreamOutput;
|
||||
use crate::exec_command::ExecCommandParams;
|
||||
use crate::exec_command::ExecSessionManager;
|
||||
use crate::exec_command::WriteStdinParams;
|
||||
use crate::executor::Executor;
|
||||
use crate::executor::ExecutorConfig;
|
||||
use crate::executor::normalize_exec_result;
|
||||
// Removed: legacy executor wiring replaced by ToolOrchestrator flows.
|
||||
// legacy normalize_exec_result no longer used after orchestrator migration
|
||||
use crate::mcp::auth::compute_auth_statuses;
|
||||
use crate::mcp_connection_manager::McpConnectionManager;
|
||||
use crate::model_family::find_family_for_model;
|
||||
use crate::openai_model_info::get_model_info;
|
||||
use crate::openai_tools::ToolsConfig;
|
||||
use crate::openai_tools::ToolsConfigParams;
|
||||
use crate::parse_command::parse_command;
|
||||
use crate::project_doc::get_user_instructions;
|
||||
use crate::protocol::AgentMessageDeltaEvent;
|
||||
use crate::protocol::AgentReasoningDeltaEvent;
|
||||
@@ -84,13 +77,9 @@ use crate::protocol::ErrorEvent;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecApprovalRequestEvent;
|
||||
use crate::protocol::ExecCommandBeginEvent;
|
||||
use crate::protocol::ExecCommandEndEvent;
|
||||
use crate::protocol::InputItem;
|
||||
use crate::protocol::ListCustomPromptsResponseEvent;
|
||||
use crate::protocol::Op;
|
||||
use crate::protocol::PatchApplyBeginEvent;
|
||||
use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::ReviewDecision;
|
||||
use crate::protocol::ReviewOutputEvent;
|
||||
@@ -114,8 +103,10 @@ use crate::tasks::RegularTask;
|
||||
use crate::tasks::ReviewTask;
|
||||
use crate::tools::ToolRouter;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::format_exec_output_str;
|
||||
use crate::tools::parallel::ToolCallRuntime;
|
||||
use crate::tools::sandboxing::ApprovalStore;
|
||||
use crate::tools::spec::ToolsConfig;
|
||||
use crate::tools::spec::ToolsConfigParams;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use crate::user_instructions::UserInstructions;
|
||||
@@ -272,6 +263,7 @@ pub(crate) struct TurnContext {
|
||||
pub(crate) tools_config: ToolsConfig,
|
||||
pub(crate) is_review_mode: bool,
|
||||
pub(crate) final_output_json_schema: Option<Value>,
|
||||
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl TurnContext {
|
||||
@@ -404,6 +396,7 @@ impl Session {
|
||||
tools_config,
|
||||
is_review_mode: false,
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -561,19 +554,14 @@ impl Session {
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager,
|
||||
session_manager: ExecSessionManager::default(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(config.notify.clone()),
|
||||
rollout: Mutex::new(Some(rollout_recorder)),
|
||||
user_shell: default_shell,
|
||||
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
|
||||
executor: Executor::new(ExecutorConfig::new(
|
||||
session_configuration.sandbox_policy.clone(),
|
||||
session_configuration.cwd.clone(),
|
||||
config.codex_linux_sandbox_exe.clone(),
|
||||
)),
|
||||
auth_manager: Arc::clone(&auth_manager),
|
||||
otel_event_manager,
|
||||
tool_approvals: Mutex::new(ApprovalStore::default()),
|
||||
};
|
||||
|
||||
let sess = Arc::new(Session {
|
||||
@@ -655,13 +643,12 @@ impl Session {
|
||||
}
|
||||
|
||||
pub(crate) async fn new_turn(&self, updates: SessionSettingsUpdate) -> Arc<TurnContext> {
|
||||
let current_configuration = self.state.lock().await.session_configuration.clone();
|
||||
let session_configuration = current_configuration.apply(&updates);
|
||||
|
||||
self.services.executor.update_environment(
|
||||
session_configuration.sandbox_policy.clone(),
|
||||
session_configuration.cwd.clone(),
|
||||
);
|
||||
let session_configuration = {
|
||||
let mut state = self.state.lock().await;
|
||||
let session_configuration = state.session_configuration.clone().apply(&updates);
|
||||
state.session_configuration = session_configuration.clone();
|
||||
session_configuration
|
||||
};
|
||||
|
||||
let mut turn_context: TurnContext = Self::make_turn_context(
|
||||
Some(Arc::clone(&self.services.auth_manager)),
|
||||
@@ -965,168 +952,6 @@ impl Session {
|
||||
}
|
||||
}
|
||||
|
||||
async fn on_exec_command_begin(
|
||||
&self,
|
||||
turn_diff_tracker: SharedTurnDiffTracker,
|
||||
exec_command_context: ExecCommandContext,
|
||||
) {
|
||||
let ExecCommandContext {
|
||||
sub_id,
|
||||
call_id,
|
||||
command_for_display,
|
||||
cwd,
|
||||
apply_patch,
|
||||
..
|
||||
} = exec_command_context;
|
||||
let msg = match apply_patch {
|
||||
Some(ApplyPatchCommandContext {
|
||||
user_explicitly_approved_this_action,
|
||||
changes,
|
||||
}) => {
|
||||
{
|
||||
let mut tracker = turn_diff_tracker.lock().await;
|
||||
tracker.on_patch_begin(&changes);
|
||||
}
|
||||
|
||||
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
||||
call_id,
|
||||
auto_approved: !user_explicitly_approved_this_action,
|
||||
changes,
|
||||
})
|
||||
}
|
||||
None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id,
|
||||
command: command_for_display.clone(),
|
||||
cwd,
|
||||
parsed_cmd: parse_command(&command_for_display),
|
||||
}),
|
||||
};
|
||||
let event = Event {
|
||||
id: sub_id.to_string(),
|
||||
msg,
|
||||
};
|
||||
self.send_event(event).await;
|
||||
}
|
||||
|
||||
async fn on_exec_command_end(
|
||||
&self,
|
||||
turn_diff_tracker: SharedTurnDiffTracker,
|
||||
sub_id: &str,
|
||||
call_id: &str,
|
||||
output: &ExecToolCallOutput,
|
||||
is_apply_patch: bool,
|
||||
) {
|
||||
let ExecToolCallOutput {
|
||||
stdout,
|
||||
stderr,
|
||||
aggregated_output,
|
||||
duration,
|
||||
exit_code,
|
||||
timed_out: _,
|
||||
..
|
||||
} = output;
|
||||
// Send full stdout/stderr to clients; do not truncate.
|
||||
let stdout = stdout.text.clone();
|
||||
let stderr = stderr.text.clone();
|
||||
let formatted_output = format_exec_output_str(output);
|
||||
let aggregated_output: String = aggregated_output.text.clone();
|
||||
|
||||
let msg = if is_apply_patch {
|
||||
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||
call_id: call_id.to_string(),
|
||||
stdout,
|
||||
stderr,
|
||||
success: *exit_code == 0,
|
||||
})
|
||||
} else {
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: call_id.to_string(),
|
||||
stdout,
|
||||
stderr,
|
||||
aggregated_output,
|
||||
exit_code: *exit_code,
|
||||
duration: *duration,
|
||||
formatted_output,
|
||||
})
|
||||
};
|
||||
|
||||
let event = Event {
|
||||
id: sub_id.to_string(),
|
||||
msg,
|
||||
};
|
||||
self.send_event(event).await;
|
||||
|
||||
// If this is an apply_patch, after we emit the end patch, emit a second event
|
||||
// with the full turn diff if there is one.
|
||||
if is_apply_patch {
|
||||
let unified_diff = {
|
||||
let mut tracker = turn_diff_tracker.lock().await;
|
||||
tracker.get_unified_diff()
|
||||
};
|
||||
if let Ok(Some(unified_diff)) = unified_diff {
|
||||
let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
|
||||
let event = Event {
|
||||
id: sub_id.into(),
|
||||
msg,
|
||||
};
|
||||
self.send_event(event).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Runs the exec tool call and emits events for the begin and end of the
|
||||
/// command even on error.
|
||||
///
|
||||
/// Returns the output of the exec tool call.
|
||||
pub(crate) async fn run_exec_with_events(
|
||||
&self,
|
||||
turn_diff_tracker: SharedTurnDiffTracker,
|
||||
prepared: PreparedExec,
|
||||
approval_policy: AskForApproval,
|
||||
) -> Result<ExecToolCallOutput, ExecError> {
|
||||
let PreparedExec { context, request } = prepared;
|
||||
let is_apply_patch = context.apply_patch.is_some();
|
||||
let sub_id = context.sub_id.clone();
|
||||
let call_id = context.call_id.clone();
|
||||
|
||||
let begin_turn_diff = turn_diff_tracker.clone();
|
||||
let begin_context = context.clone();
|
||||
let session = self;
|
||||
let result = self
|
||||
.services
|
||||
.executor
|
||||
.run(request, self, approval_policy, &context, move || {
|
||||
let turn_diff = begin_turn_diff.clone();
|
||||
let ctx = begin_context.clone();
|
||||
async move {
|
||||
session.on_exec_command_begin(turn_diff, ctx).await;
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
if matches!(
|
||||
&result,
|
||||
Err(ExecError::Function(FunctionCallError::Denied(_)))
|
||||
) {
|
||||
return result;
|
||||
}
|
||||
|
||||
let normalized = normalize_exec_result(&result);
|
||||
let borrowed = normalized.event_output();
|
||||
|
||||
self.on_exec_command_end(
|
||||
turn_diff_tracker,
|
||||
&sub_id,
|
||||
&call_id,
|
||||
borrowed,
|
||||
is_apply_patch,
|
||||
)
|
||||
.await;
|
||||
|
||||
drop(normalized);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Helper that emits a BackgroundEvent with the given message. This keeps
|
||||
/// the call‑sites terse so adding more diagnostics does not clutter the
|
||||
/// core agent logic.
|
||||
@@ -1235,43 +1060,6 @@ impl Session {
|
||||
.parse_tool_name(tool_name)
|
||||
}
|
||||
|
||||
pub(crate) async fn handle_exec_command_tool(
|
||||
&self,
|
||||
params: ExecCommandParams,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
let result = self
|
||||
.services
|
||||
.session_manager
|
||||
.handle_exec_command_request(params)
|
||||
.await;
|
||||
match result {
|
||||
Ok(output) => Ok(output.to_text_output()),
|
||||
Err(err) => Err(FunctionCallError::RespondToModel(err)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn handle_write_stdin_tool(
|
||||
&self,
|
||||
params: WriteStdinParams,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
self.services
|
||||
.session_manager
|
||||
.handle_write_stdin_request(params)
|
||||
.await
|
||||
.map(|output| output.to_text_output())
|
||||
.map_err(FunctionCallError::RespondToModel)
|
||||
}
|
||||
|
||||
pub(crate) async fn run_unified_exec_request(
|
||||
&self,
|
||||
request: crate::unified_exec::UnifiedExecRequest<'_>,
|
||||
) -> Result<crate::unified_exec::UnifiedExecResult, crate::unified_exec::UnifiedExecError> {
|
||||
self.services
|
||||
.unified_exec_manager
|
||||
.handle_request(request)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn interrupt_task(self: &Arc<Self>) {
|
||||
info!("interrupt received: abort current task, if any");
|
||||
self.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||
@@ -1633,6 +1421,7 @@ async fn spawn_review_thread(
|
||||
cwd: parent_turn_context.cwd.clone(),
|
||||
is_review_mode: true,
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
||||
};
|
||||
|
||||
// Seed the child task with the review prompt as the initial user message.
|
||||
@@ -2511,10 +2300,6 @@ fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
|
||||
error.to_string().contains("Auth required")
|
||||
}
|
||||
|
||||
use crate::executor::errors::ExecError;
|
||||
use crate::executor::linkers::PreparedExec;
|
||||
use crate::tools::context::ApplyPatchCommandContext;
|
||||
use crate::tools::context::ExecCommandContext;
|
||||
#[cfg(test)]
|
||||
pub(crate) use tests::make_session_and_context;
|
||||
|
||||
@@ -2523,6 +2308,8 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::config::ConfigOverrides;
|
||||
use crate::config::ConfigToml;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::tools::format_exec_output_str;
|
||||
|
||||
use crate::protocol::CompactedItem;
|
||||
use crate::protocol::InitialHistory;
|
||||
@@ -2827,19 +2614,14 @@ mod tests {
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager: McpConnectionManager::default(),
|
||||
session_manager: ExecSessionManager::default(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(None),
|
||||
rollout: Mutex::new(None),
|
||||
user_shell: shell::Shell::Unknown,
|
||||
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
|
||||
executor: Executor::new(ExecutorConfig::new(
|
||||
session_configuration.sandbox_policy.clone(),
|
||||
session_configuration.cwd.clone(),
|
||||
None,
|
||||
)),
|
||||
auth_manager: Arc::clone(&auth_manager),
|
||||
otel_event_manager: otel_event_manager.clone(),
|
||||
tool_approvals: Mutex::new(ApprovalStore::default()),
|
||||
};
|
||||
|
||||
let session = Session {
|
||||
@@ -2898,19 +2680,14 @@ mod tests {
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager: McpConnectionManager::default(),
|
||||
session_manager: ExecSessionManager::default(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(None),
|
||||
rollout: Mutex::new(None),
|
||||
user_shell: shell::Shell::Unknown,
|
||||
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
|
||||
executor: Executor::new(ExecutorConfig::new(
|
||||
session_configuration.sandbox_policy.clone(),
|
||||
session_configuration.cwd.clone(),
|
||||
None,
|
||||
)),
|
||||
auth_manager: Arc::clone(&auth_manager),
|
||||
otel_event_manager: otel_event_manager.clone(),
|
||||
tool_approvals: Mutex::new(ApprovalStore::default()),
|
||||
};
|
||||
|
||||
let session = Arc::new(Session {
|
||||
@@ -3258,6 +3035,7 @@ mod tests {
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: Some(true),
|
||||
justification: Some("test".to_string()),
|
||||
arg0: None,
|
||||
};
|
||||
|
||||
let params2 = ExecParams {
|
||||
|
||||
@@ -1,15 +1,25 @@
|
||||
use crate::bash::parse_bash_lc_plain_commands;
|
||||
|
||||
pub fn is_known_safe_command(command: &[String]) -> bool {
|
||||
let command: Vec<String> = command
|
||||
.iter()
|
||||
.map(|s| {
|
||||
if s == "zsh" {
|
||||
"bash".to_string()
|
||||
} else {
|
||||
s.clone()
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
use super::windows_safe_commands::is_safe_command_windows;
|
||||
if is_safe_command_windows(command) {
|
||||
if is_safe_command_windows(&command) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if is_safe_to_call_with_exec(command) {
|
||||
if is_safe_to_call_with_exec(&command) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -19,7 +29,7 @@ pub fn is_known_safe_command(command: &[String]) -> bool {
|
||||
// introduce side effects ( "&&", "||", ";", and "|" ). If every
|
||||
// individual command in the script is itself a known‑safe command, then
|
||||
// the composite expression is considered safe.
|
||||
if let Some(all_commands) = parse_bash_lc_plain_commands(command)
|
||||
if let Some(all_commands) = parse_bash_lc_plain_commands(&command)
|
||||
&& !all_commands.is_empty()
|
||||
&& all_commands
|
||||
.iter()
|
||||
@@ -31,9 +41,14 @@ pub fn is_known_safe_command(command: &[String]) -> bool {
|
||||
}
|
||||
|
||||
fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
let cmd0 = command.first().map(String::as_str);
|
||||
let Some(cmd0) = command.first().map(String::as_str) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
match cmd0 {
|
||||
match std::path::Path::new(&cmd0)
|
||||
.file_name()
|
||||
.and_then(|osstr| osstr.to_str())
|
||||
{
|
||||
#[rustfmt::skip]
|
||||
Some(
|
||||
"cat" |
|
||||
@@ -103,13 +118,12 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
// Rust
|
||||
Some("cargo") if command.get(1).map(String::as_str) == Some("check") => true,
|
||||
|
||||
// Special-case `sed -n {N|M,N}p FILE`
|
||||
// Special-case `sed -n {N|M,N}p`
|
||||
Some("sed")
|
||||
if {
|
||||
command.len() == 4
|
||||
command.len() <= 4
|
||||
&& command.get(1).map(String::as_str) == Some("-n")
|
||||
&& is_valid_sed_n_arg(command.get(2).map(String::as_str))
|
||||
&& command.get(3).map(String::is_empty) == Some(false)
|
||||
} =>
|
||||
{
|
||||
true
|
||||
|
||||
@@ -18,13 +18,14 @@ use tokio::process::Child;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::landlock::spawn_command_under_linux_sandbox;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecCommandOutputDeltaEvent;
|
||||
use crate::protocol::ExecOutputStream;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::seatbelt::spawn_command_under_seatbelt;
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::sandboxing::ExecEnv;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::spawn::StdioPolicy;
|
||||
use crate::spawn::spawn_child_async;
|
||||
|
||||
@@ -53,6 +54,7 @@ pub struct ExecParams {
|
||||
pub env: HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub arg0: Option<String>,
|
||||
}
|
||||
|
||||
impl ExecParams {
|
||||
@@ -87,57 +89,85 @@ pub async fn process_exec_tool_call(
|
||||
codex_linux_sandbox_exe: &Option<PathBuf>,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
) -> Result<ExecToolCallOutput> {
|
||||
let start = Instant::now();
|
||||
let ExecParams {
|
||||
command,
|
||||
cwd,
|
||||
timeout_ms,
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0: _,
|
||||
} = params;
|
||||
|
||||
let timeout_duration = params.timeout_duration();
|
||||
let (program, args) = command.split_first().ok_or_else(|| {
|
||||
CodexErr::Io(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"command args are empty",
|
||||
))
|
||||
})?;
|
||||
|
||||
let raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr> = match sandbox_type
|
||||
{
|
||||
SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await,
|
||||
SandboxType::MacosSeatbelt => {
|
||||
let ExecParams {
|
||||
command,
|
||||
cwd: command_cwd,
|
||||
env,
|
||||
..
|
||||
} = params;
|
||||
let child = spawn_command_under_seatbelt(
|
||||
command,
|
||||
command_cwd,
|
||||
sandbox_policy,
|
||||
sandbox_cwd,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
env,
|
||||
)
|
||||
.await?;
|
||||
consume_truncated_output(child, timeout_duration, stdout_stream.clone()).await
|
||||
}
|
||||
SandboxType::LinuxSeccomp => {
|
||||
let ExecParams {
|
||||
command,
|
||||
cwd: command_cwd,
|
||||
env,
|
||||
..
|
||||
} = params;
|
||||
|
||||
let codex_linux_sandbox_exe = codex_linux_sandbox_exe
|
||||
.as_ref()
|
||||
.ok_or(CodexErr::LandlockSandboxExecutableNotProvided)?;
|
||||
let child = spawn_command_under_linux_sandbox(
|
||||
codex_linux_sandbox_exe,
|
||||
command,
|
||||
command_cwd,
|
||||
sandbox_policy,
|
||||
sandbox_cwd,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
env,
|
||||
)
|
||||
.await?;
|
||||
|
||||
consume_truncated_output(child, timeout_duration, stdout_stream).await
|
||||
}
|
||||
let spec = CommandSpec {
|
||||
program: program.clone(),
|
||||
args: args.to_vec(),
|
||||
cwd,
|
||||
env,
|
||||
timeout_ms,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
};
|
||||
|
||||
let manager = SandboxManager::new();
|
||||
let exec_env = manager
|
||||
.transform(
|
||||
&spec,
|
||||
sandbox_policy,
|
||||
sandbox_type,
|
||||
sandbox_cwd,
|
||||
codex_linux_sandbox_exe.as_ref(),
|
||||
)
|
||||
.map_err(CodexErr::from)?;
|
||||
|
||||
// Route through the sandboxing module for a single, unified execution path.
|
||||
crate::sandboxing::execute_env(&exec_env, sandbox_policy, stdout_stream).await
|
||||
}
|
||||
|
||||
pub(crate) async fn execute_exec_env(
|
||||
env: ExecEnv,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
) -> Result<ExecToolCallOutput> {
|
||||
let ExecEnv {
|
||||
command,
|
||||
cwd,
|
||||
env,
|
||||
timeout_ms,
|
||||
sandbox,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0,
|
||||
} = env;
|
||||
|
||||
let params = ExecParams {
|
||||
command,
|
||||
cwd,
|
||||
timeout_ms,
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0,
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
let raw_output_result = exec(params, sandbox_policy, stdout_stream).await;
|
||||
let duration = start.elapsed();
|
||||
finalize_exec_result(raw_output_result, sandbox, duration)
|
||||
}
|
||||
|
||||
fn finalize_exec_result(
|
||||
raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr>,
|
||||
sandbox_type: SandboxType,
|
||||
duration: Duration,
|
||||
) -> Result<ExecToolCallOutput> {
|
||||
match raw_output_result {
|
||||
Ok(raw_output) => {
|
||||
#[allow(unused_mut)]
|
||||
@@ -192,12 +222,30 @@ pub async fn process_exec_tool_call(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) mod errors {
|
||||
use super::CodexErr;
|
||||
use crate::sandboxing::SandboxTransformError;
|
||||
|
||||
impl From<SandboxTransformError> for CodexErr {
|
||||
fn from(err: SandboxTransformError) -> Self {
|
||||
match err {
|
||||
SandboxTransformError::MissingLinuxSandboxExecutable => {
|
||||
CodexErr::LandlockSandboxExecutableNotProvided
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// We don't have a fully deterministic way to tell if our command failed
|
||||
/// because of the sandbox - a command in the user's zshrc file might hit an
|
||||
/// error, but the command itself might fail or succeed for other reasons.
|
||||
/// For now, we conservatively check for well known command failure exit codes and
|
||||
/// also look for common sandbox denial keywords in the command output.
|
||||
fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCallOutput) -> bool {
|
||||
pub(crate) fn is_likely_sandbox_denied(
|
||||
sandbox_type: SandboxType,
|
||||
exec_output: &ExecToolCallOutput,
|
||||
) -> bool {
|
||||
if sandbox_type == SandboxType::None || exec_output.exit_code == 0 {
|
||||
return false;
|
||||
}
|
||||
@@ -206,21 +254,17 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal
|
||||
// 2: misuse of shell builtins
|
||||
// 126: permission denied
|
||||
// 127: command not found
|
||||
const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
|
||||
if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const SANDBOX_DENIED_KEYWORDS: [&str; 6] = [
|
||||
const SANDBOX_DENIED_KEYWORDS: [&str; 7] = [
|
||||
"operation not permitted",
|
||||
"permission denied",
|
||||
"read-only file system",
|
||||
"seccomp",
|
||||
"sandbox",
|
||||
"landlock",
|
||||
"failed to write file",
|
||||
];
|
||||
|
||||
if [
|
||||
let has_sandbox_keyword = [
|
||||
&exec_output.stderr.text,
|
||||
&exec_output.stdout.text,
|
||||
&exec_output.aggregated_output.text,
|
||||
@@ -231,10 +275,17 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal
|
||||
SANDBOX_DENIED_KEYWORDS
|
||||
.iter()
|
||||
.any(|needle| lower.contains(needle))
|
||||
}) {
|
||||
});
|
||||
|
||||
if has_sandbox_keyword {
|
||||
return true;
|
||||
}
|
||||
|
||||
const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
|
||||
if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
|
||||
return false;
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
const SIGSYS_CODE: i32 = libc::SIGSYS;
|
||||
@@ -248,11 +299,12 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal
|
||||
false
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct StreamOutput<T> {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StreamOutput<T: Clone> {
|
||||
pub text: T,
|
||||
pub truncated_after_lines: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RawExecToolCallOutput {
|
||||
pub exit_status: ExitStatus,
|
||||
@@ -285,7 +337,7 @@ fn append_all(dst: &mut Vec<u8>, src: &[u8]) {
|
||||
dst.extend_from_slice(src);
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ExecToolCallOutput {
|
||||
pub exit_code: i32,
|
||||
pub stdout: StreamOutput<String>,
|
||||
@@ -302,7 +354,11 @@ async fn exec(
|
||||
) -> Result<RawExecToolCallOutput> {
|
||||
let timeout = params.timeout_duration();
|
||||
let ExecParams {
|
||||
command, cwd, env, ..
|
||||
command,
|
||||
cwd,
|
||||
env,
|
||||
arg0,
|
||||
..
|
||||
} = params;
|
||||
|
||||
let (program, args) = command.split_first().ok_or_else(|| {
|
||||
@@ -311,11 +367,11 @@ async fn exec(
|
||||
"command args are empty",
|
||||
))
|
||||
})?;
|
||||
let arg0 = None;
|
||||
let arg0_ref = arg0.as_deref();
|
||||
let child = spawn_child_async(
|
||||
PathBuf::from(program),
|
||||
args.into(),
|
||||
arg0,
|
||||
arg0_ref,
|
||||
cwd,
|
||||
sandbox_policy,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::exec_command::session_id::SessionId;
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ExecCommandParams {
|
||||
pub(crate) cmd: String,
|
||||
|
||||
#[serde(default = "default_yield_time")]
|
||||
pub(crate) yield_time_ms: u64,
|
||||
|
||||
#[serde(default = "max_output_tokens")]
|
||||
pub(crate) max_output_tokens: u64,
|
||||
|
||||
#[serde(default = "default_shell")]
|
||||
pub(crate) shell: String,
|
||||
|
||||
#[serde(default = "default_login")]
|
||||
pub(crate) login: bool,
|
||||
}
|
||||
|
||||
fn default_yield_time() -> u64 {
|
||||
10_000
|
||||
}
|
||||
|
||||
fn max_output_tokens() -> u64 {
|
||||
10_000
|
||||
}
|
||||
|
||||
fn default_login() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn default_shell() -> String {
|
||||
"/bin/bash".to_string()
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct WriteStdinParams {
|
||||
pub(crate) session_id: SessionId,
|
||||
pub(crate) chars: String,
|
||||
|
||||
#[serde(default = "write_stdin_default_yield_time_ms")]
|
||||
pub(crate) yield_time_ms: u64,
|
||||
|
||||
#[serde(default = "write_stdin_default_max_output_tokens")]
|
||||
pub(crate) max_output_tokens: u64,
|
||||
}
|
||||
|
||||
fn write_stdin_default_yield_time_ms() -> u64 {
|
||||
250
|
||||
}
|
||||
|
||||
fn write_stdin_default_max_output_tokens() -> u64 {
|
||||
10_000
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
use std::sync::Mutex as StdMutex;
|
||||
|
||||
use tokio::sync::broadcast;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ExecCommandSession {
|
||||
/// Queue for writing bytes to the process stdin (PTY master write side).
|
||||
writer_tx: mpsc::Sender<Vec<u8>>,
|
||||
/// Broadcast stream of output chunks read from the PTY. New subscribers
|
||||
/// receive only chunks emitted after they subscribe.
|
||||
output_tx: broadcast::Sender<Vec<u8>>,
|
||||
|
||||
/// Child killer handle for termination on drop (can signal independently
|
||||
/// of a thread blocked in `.wait()`).
|
||||
killer: StdMutex<Option<Box<dyn portable_pty::ChildKiller + Send + Sync>>>,
|
||||
|
||||
/// JoinHandle for the blocking PTY reader task.
|
||||
reader_handle: StdMutex<Option<JoinHandle<()>>>,
|
||||
|
||||
/// JoinHandle for the stdin writer task.
|
||||
writer_handle: StdMutex<Option<JoinHandle<()>>>,
|
||||
|
||||
/// JoinHandle for the child wait task.
|
||||
wait_handle: StdMutex<Option<JoinHandle<()>>>,
|
||||
|
||||
/// Tracks whether the underlying process has exited.
|
||||
exit_status: std::sync::Arc<std::sync::atomic::AtomicBool>,
|
||||
}
|
||||
|
||||
impl ExecCommandSession {
|
||||
pub(crate) fn new(
|
||||
writer_tx: mpsc::Sender<Vec<u8>>,
|
||||
output_tx: broadcast::Sender<Vec<u8>>,
|
||||
killer: Box<dyn portable_pty::ChildKiller + Send + Sync>,
|
||||
reader_handle: JoinHandle<()>,
|
||||
writer_handle: JoinHandle<()>,
|
||||
wait_handle: JoinHandle<()>,
|
||||
exit_status: std::sync::Arc<std::sync::atomic::AtomicBool>,
|
||||
) -> (Self, broadcast::Receiver<Vec<u8>>) {
|
||||
let initial_output_rx = output_tx.subscribe();
|
||||
(
|
||||
Self {
|
||||
writer_tx,
|
||||
output_tx,
|
||||
killer: StdMutex::new(Some(killer)),
|
||||
reader_handle: StdMutex::new(Some(reader_handle)),
|
||||
writer_handle: StdMutex::new(Some(writer_handle)),
|
||||
wait_handle: StdMutex::new(Some(wait_handle)),
|
||||
exit_status,
|
||||
},
|
||||
initial_output_rx,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn writer_sender(&self) -> mpsc::Sender<Vec<u8>> {
|
||||
self.writer_tx.clone()
|
||||
}
|
||||
|
||||
pub(crate) fn output_receiver(&self) -> broadcast::Receiver<Vec<u8>> {
|
||||
self.output_tx.subscribe()
|
||||
}
|
||||
|
||||
pub(crate) fn has_exited(&self) -> bool {
|
||||
self.exit_status.load(std::sync::atomic::Ordering::SeqCst)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ExecCommandSession {
|
||||
fn drop(&mut self) {
|
||||
// Best-effort: terminate child first so blocking tasks can complete.
|
||||
if let Ok(mut killer_opt) = self.killer.lock()
|
||||
&& let Some(mut killer) = killer_opt.take()
|
||||
{
|
||||
let _ = killer.kill();
|
||||
}
|
||||
|
||||
// Abort background tasks; they may already have exited after kill.
|
||||
if let Ok(mut h) = self.reader_handle.lock()
|
||||
&& let Some(handle) = h.take()
|
||||
{
|
||||
handle.abort();
|
||||
}
|
||||
if let Ok(mut h) = self.writer_handle.lock()
|
||||
&& let Some(handle) = h.take()
|
||||
{
|
||||
handle.abort();
|
||||
}
|
||||
if let Ok(mut h) = self.wait_handle.lock()
|
||||
&& let Some(handle) = h.take()
|
||||
{
|
||||
handle.abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
mod exec_command_params;
|
||||
mod exec_command_session;
|
||||
mod responses_api;
|
||||
mod session_id;
|
||||
mod session_manager;
|
||||
|
||||
pub use exec_command_params::ExecCommandParams;
|
||||
pub use exec_command_params::WriteStdinParams;
|
||||
pub(crate) use exec_command_session::ExecCommandSession;
|
||||
pub use responses_api::EXEC_COMMAND_TOOL_NAME;
|
||||
pub use responses_api::WRITE_STDIN_TOOL_NAME;
|
||||
pub use responses_api::create_exec_command_tool_for_responses_api;
|
||||
pub use responses_api::create_write_stdin_tool_for_responses_api;
|
||||
pub use session_manager::SessionManager as ExecSessionManager;
|
||||
@@ -1,98 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::client_common::tools::ResponsesApiTool;
|
||||
use crate::openai_tools::JsonSchema;
|
||||
|
||||
pub const EXEC_COMMAND_TOOL_NAME: &str = "exec_command";
|
||||
pub const WRITE_STDIN_TOOL_NAME: &str = "write_stdin";
|
||||
|
||||
pub fn create_exec_command_tool_for_responses_api() -> ResponsesApiTool {
|
||||
let mut properties = BTreeMap::<String, JsonSchema>::new();
|
||||
properties.insert(
|
||||
"cmd".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("The shell command to execute.".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"yield_time_ms".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("The maximum time in milliseconds to wait for output.".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"max_output_tokens".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("The maximum number of tokens to output.".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"shell".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("The shell to use. Defaults to \"/bin/bash\".".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"login".to_string(),
|
||||
JsonSchema::Boolean {
|
||||
description: Some(
|
||||
"Whether to run the command as a login shell. Defaults to true.".to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
|
||||
ResponsesApiTool {
|
||||
name: EXEC_COMMAND_TOOL_NAME.to_owned(),
|
||||
description: r#"Execute shell commands on the local machine with streaming output."#
|
||||
.to_string(),
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: Some(vec!["cmd".to_string()]),
|
||||
additional_properties: Some(false.into()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_write_stdin_tool_for_responses_api() -> ResponsesApiTool {
|
||||
let mut properties = BTreeMap::<String, JsonSchema>::new();
|
||||
properties.insert(
|
||||
"session_id".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("The ID of the exec_command session.".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"chars".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("The characters to write to stdin.".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"yield_time_ms".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(
|
||||
"The maximum time in milliseconds to wait for output after writing.".to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"max_output_tokens".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("The maximum number of tokens to output.".to_string()),
|
||||
},
|
||||
);
|
||||
|
||||
ResponsesApiTool {
|
||||
name: WRITE_STDIN_TOOL_NAME.to_owned(),
|
||||
description: r#"Write characters to an exec session's stdin. Returns all stdout+stderr received within yield_time_ms.
|
||||
Can write control characters (\u0003 for Ctrl-C), or an empty string to just poll stdout+stderr."#
|
||||
.to_string(),
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: Some(vec!["session_id".to_string(), "chars".to_string()]),
|
||||
additional_properties: Some(false.into()),
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub(crate) struct SessionId(pub u32);
|
||||
@@ -1,506 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io::ErrorKind;
|
||||
use std::io::Read;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex as StdMutex;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::AtomicU32;
|
||||
|
||||
use portable_pty::CommandBuilder;
|
||||
use portable_pty::PtySize;
|
||||
use portable_pty::native_pty_system;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio::time::Duration;
|
||||
use tokio::time::Instant;
|
||||
use tokio::time::timeout;
|
||||
|
||||
use crate::exec_command::exec_command_params::ExecCommandParams;
|
||||
use crate::exec_command::exec_command_params::WriteStdinParams;
|
||||
use crate::exec_command::exec_command_session::ExecCommandSession;
|
||||
use crate::exec_command::session_id::SessionId;
|
||||
use crate::truncate::truncate_middle;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SessionManager {
|
||||
next_session_id: AtomicU32,
|
||||
sessions: Mutex<HashMap<SessionId, ExecCommandSession>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ExecCommandOutput {
|
||||
wall_time: Duration,
|
||||
exit_status: ExitStatus,
|
||||
original_token_count: Option<u64>,
|
||||
output: String,
|
||||
}
|
||||
|
||||
impl ExecCommandOutput {
|
||||
pub(crate) fn to_text_output(&self) -> String {
|
||||
let wall_time_secs = self.wall_time.as_secs_f32();
|
||||
let termination_status = match self.exit_status {
|
||||
ExitStatus::Exited(code) => format!("Process exited with code {code}"),
|
||||
ExitStatus::Ongoing(session_id) => {
|
||||
format!("Process running with session ID {}", session_id.0)
|
||||
}
|
||||
};
|
||||
let truncation_status = match self.original_token_count {
|
||||
Some(tokens) => {
|
||||
format!("\nWarning: truncated output (original token count: {tokens})")
|
||||
}
|
||||
None => "".to_string(),
|
||||
};
|
||||
format!(
|
||||
r#"Wall time: {wall_time_secs:.3} seconds
|
||||
{termination_status}{truncation_status}
|
||||
Output:
|
||||
{output}"#,
|
||||
output = self.output
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ExitStatus {
|
||||
Exited(i32),
|
||||
Ongoing(SessionId),
|
||||
}
|
||||
|
||||
impl SessionManager {
|
||||
/// Processes the request and is required to send a response via `outgoing`.
|
||||
pub async fn handle_exec_command_request(
|
||||
&self,
|
||||
params: ExecCommandParams,
|
||||
) -> Result<ExecCommandOutput, String> {
|
||||
// Allocate a session id.
|
||||
let session_id = SessionId(
|
||||
self.next_session_id
|
||||
.fetch_add(1, std::sync::atomic::Ordering::SeqCst),
|
||||
);
|
||||
|
||||
let (session, mut output_rx, mut exit_rx) = create_exec_command_session(params.clone())
|
||||
.await
|
||||
.map_err(|err| {
|
||||
format!(
|
||||
"failed to create exec command session for session id {}: {err}",
|
||||
session_id.0
|
||||
)
|
||||
})?;
|
||||
|
||||
// Insert into session map.
|
||||
self.sessions.lock().await.insert(session_id, session);
|
||||
|
||||
// Collect output until either timeout expires or process exits.
|
||||
// Do not cap during collection; truncate at the end if needed.
|
||||
// Use a modest initial capacity to avoid large preallocation.
|
||||
let cap_bytes_u64 = params.max_output_tokens.saturating_mul(4);
|
||||
let cap_bytes: usize = cap_bytes_u64.min(usize::MAX as u64) as usize;
|
||||
let mut collected: Vec<u8> = Vec::with_capacity(4096);
|
||||
|
||||
let start_time = Instant::now();
|
||||
let deadline = start_time + Duration::from_millis(params.yield_time_ms);
|
||||
let mut exit_code: Option<i32> = None;
|
||||
|
||||
loop {
|
||||
if Instant::now() >= deadline {
|
||||
break;
|
||||
}
|
||||
let remaining = deadline.saturating_duration_since(Instant::now());
|
||||
tokio::select! {
|
||||
biased;
|
||||
exit = &mut exit_rx => {
|
||||
exit_code = exit.ok();
|
||||
// Small grace period to pull remaining buffered output
|
||||
let grace_deadline = Instant::now() + Duration::from_millis(25);
|
||||
while Instant::now() < grace_deadline {
|
||||
match timeout(Duration::from_millis(1), output_rx.recv()).await {
|
||||
Ok(Ok(chunk)) => {
|
||||
collected.extend_from_slice(&chunk);
|
||||
}
|
||||
Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => {
|
||||
// Skip missed messages; keep trying within grace period.
|
||||
continue;
|
||||
}
|
||||
Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => break,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
chunk = timeout(remaining, output_rx.recv()) => {
|
||||
match chunk {
|
||||
Ok(Ok(chunk)) => {
|
||||
collected.extend_from_slice(&chunk);
|
||||
}
|
||||
Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => {
|
||||
// Skip missed messages; continue collecting fresh output.
|
||||
}
|
||||
Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => { break; }
|
||||
Err(_) => { break; }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let output = String::from_utf8_lossy(&collected).to_string();
|
||||
|
||||
let exit_status = if let Some(code) = exit_code {
|
||||
ExitStatus::Exited(code)
|
||||
} else {
|
||||
ExitStatus::Ongoing(session_id)
|
||||
};
|
||||
|
||||
// If output exceeds cap, truncate the middle and record original token estimate.
|
||||
let (output, original_token_count) = truncate_middle(&output, cap_bytes);
|
||||
Ok(ExecCommandOutput {
|
||||
wall_time: Instant::now().duration_since(start_time),
|
||||
exit_status,
|
||||
original_token_count,
|
||||
output,
|
||||
})
|
||||
}
|
||||
|
||||
/// Write characters to a session's stdin and collect combined output for up to `yield_time_ms`.
|
||||
pub async fn handle_write_stdin_request(
|
||||
&self,
|
||||
params: WriteStdinParams,
|
||||
) -> Result<ExecCommandOutput, String> {
|
||||
let WriteStdinParams {
|
||||
session_id,
|
||||
chars,
|
||||
yield_time_ms,
|
||||
max_output_tokens,
|
||||
} = params;
|
||||
|
||||
// Grab handles without holding the sessions lock across await points.
|
||||
let (writer_tx, mut output_rx) = {
|
||||
let sessions = self.sessions.lock().await;
|
||||
match sessions.get(&session_id) {
|
||||
Some(session) => (session.writer_sender(), session.output_receiver()),
|
||||
None => {
|
||||
return Err(format!("unknown session id {}", session_id.0));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Write stdin if provided.
|
||||
if !chars.is_empty() && writer_tx.send(chars.into_bytes()).await.is_err() {
|
||||
return Err("failed to write to stdin".to_string());
|
||||
}
|
||||
|
||||
// Collect output up to yield_time_ms, truncating to max_output_tokens bytes.
|
||||
let mut collected: Vec<u8> = Vec::with_capacity(4096);
|
||||
let start_time = Instant::now();
|
||||
let deadline = start_time + Duration::from_millis(yield_time_ms);
|
||||
loop {
|
||||
let now = Instant::now();
|
||||
if now >= deadline {
|
||||
break;
|
||||
}
|
||||
let remaining = deadline - now;
|
||||
match timeout(remaining, output_rx.recv()).await {
|
||||
Ok(Ok(chunk)) => {
|
||||
// Collect all output within the time budget; truncate at the end.
|
||||
collected.extend_from_slice(&chunk);
|
||||
}
|
||||
Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => {
|
||||
// Skip missed messages; continue collecting fresh output.
|
||||
}
|
||||
Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => break,
|
||||
Err(_) => break, // timeout
|
||||
}
|
||||
}
|
||||
|
||||
// Return structured output, truncating middle if over cap.
|
||||
let output = String::from_utf8_lossy(&collected).to_string();
|
||||
let cap_bytes_u64 = max_output_tokens.saturating_mul(4);
|
||||
let cap_bytes: usize = cap_bytes_u64.min(usize::MAX as u64) as usize;
|
||||
let (output, original_token_count) = truncate_middle(&output, cap_bytes);
|
||||
Ok(ExecCommandOutput {
|
||||
wall_time: Instant::now().duration_since(start_time),
|
||||
exit_status: ExitStatus::Ongoing(session_id),
|
||||
original_token_count,
|
||||
output,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn PTY and child process per spawn_exec_command_session logic.
|
||||
async fn create_exec_command_session(
|
||||
params: ExecCommandParams,
|
||||
) -> anyhow::Result<(
|
||||
ExecCommandSession,
|
||||
tokio::sync::broadcast::Receiver<Vec<u8>>,
|
||||
oneshot::Receiver<i32>,
|
||||
)> {
|
||||
let ExecCommandParams {
|
||||
cmd,
|
||||
yield_time_ms: _,
|
||||
max_output_tokens: _,
|
||||
shell,
|
||||
login,
|
||||
} = params;
|
||||
|
||||
// Use the native pty implementation for the system
|
||||
let pty_system = native_pty_system();
|
||||
|
||||
// Create a new pty
|
||||
let pair = pty_system.openpty(PtySize {
|
||||
rows: 24,
|
||||
cols: 80,
|
||||
pixel_width: 0,
|
||||
pixel_height: 0,
|
||||
})?;
|
||||
|
||||
// Spawn a shell into the pty
|
||||
let mut command_builder = CommandBuilder::new(shell);
|
||||
let shell_mode_opt = if login { "-lc" } else { "-c" };
|
||||
command_builder.arg(shell_mode_opt);
|
||||
command_builder.arg(cmd);
|
||||
|
||||
let mut child = pair.slave.spawn_command(command_builder)?;
|
||||
// Obtain a killer that can signal the process independently of `.wait()`.
|
||||
let killer = child.clone_killer();
|
||||
|
||||
// Channel to forward write requests to the PTY writer.
|
||||
let (writer_tx, mut writer_rx) = mpsc::channel::<Vec<u8>>(128);
|
||||
// Broadcast for streaming PTY output to readers: subscribers receive from subscription time.
|
||||
let (output_tx, _) = tokio::sync::broadcast::channel::<Vec<u8>>(256);
|
||||
// Reader task: drain PTY and forward chunks to output channel.
|
||||
let mut reader = pair.master.try_clone_reader()?;
|
||||
let output_tx_clone = output_tx.clone();
|
||||
let reader_handle = tokio::task::spawn_blocking(move || {
|
||||
let mut buf = [0u8; 8192];
|
||||
loop {
|
||||
match reader.read(&mut buf) {
|
||||
Ok(0) => break, // EOF
|
||||
Ok(n) => {
|
||||
// Forward to broadcast; best-effort if there are subscribers.
|
||||
let _ = output_tx_clone.send(buf[..n].to_vec());
|
||||
}
|
||||
Err(ref e) if e.kind() == ErrorKind::Interrupted => {
|
||||
// Retry on EINTR
|
||||
continue;
|
||||
}
|
||||
Err(ref e) if e.kind() == ErrorKind::WouldBlock => {
|
||||
// We're in a blocking thread; back off briefly and retry.
|
||||
std::thread::sleep(Duration::from_millis(5));
|
||||
continue;
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Writer task: apply stdin writes to the PTY writer.
|
||||
let writer = pair.master.take_writer()?;
|
||||
let writer = Arc::new(StdMutex::new(writer));
|
||||
let writer_handle = tokio::spawn({
|
||||
let writer = writer.clone();
|
||||
async move {
|
||||
while let Some(bytes) = writer_rx.recv().await {
|
||||
let writer = writer.clone();
|
||||
// Perform blocking write on a blocking thread.
|
||||
let _ = tokio::task::spawn_blocking(move || {
|
||||
if let Ok(mut guard) = writer.lock() {
|
||||
use std::io::Write;
|
||||
let _ = guard.write_all(&bytes);
|
||||
let _ = guard.flush();
|
||||
}
|
||||
})
|
||||
.await;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Keep the child alive until it exits, then signal exit code.
|
||||
let (exit_tx, exit_rx) = oneshot::channel::<i32>();
|
||||
let exit_status = Arc::new(AtomicBool::new(false));
|
||||
let wait_exit_status = exit_status.clone();
|
||||
let wait_handle = tokio::task::spawn_blocking(move || {
|
||||
let code = match child.wait() {
|
||||
Ok(status) => status.exit_code() as i32,
|
||||
Err(_) => -1,
|
||||
};
|
||||
wait_exit_status.store(true, std::sync::atomic::Ordering::SeqCst);
|
||||
let _ = exit_tx.send(code);
|
||||
});
|
||||
|
||||
// Create and store the session with channels.
|
||||
let (session, initial_output_rx) = ExecCommandSession::new(
|
||||
writer_tx,
|
||||
output_tx,
|
||||
killer,
|
||||
reader_handle,
|
||||
writer_handle,
|
||||
wait_handle,
|
||||
exit_status,
|
||||
);
|
||||
Ok((session, initial_output_rx, exit_rx))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::exec_command::session_id::SessionId;
|
||||
|
||||
/// Test that verifies that [`SessionManager::handle_exec_command_request()`]
|
||||
/// and [`SessionManager::handle_write_stdin_request()`] work as expected
|
||||
/// in the presence of a process that never terminates (but produces
|
||||
/// output continuously).
|
||||
#[cfg(unix)]
|
||||
#[allow(clippy::print_stderr)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn session_manager_streams_and_truncates_from_now() {
|
||||
use crate::exec_command::exec_command_params::ExecCommandParams;
|
||||
use crate::exec_command::exec_command_params::WriteStdinParams;
|
||||
use tokio::time::sleep;
|
||||
|
||||
let session_manager = SessionManager::default();
|
||||
// Long-running loop that prints an increasing counter every ~100ms.
|
||||
// Use Python for a portable, reliable sleep across shells/PTYs.
|
||||
let cmd = r#"python3 - <<'PY'
|
||||
import sys, time
|
||||
count = 0
|
||||
while True:
|
||||
print(count)
|
||||
sys.stdout.flush()
|
||||
count += 100
|
||||
time.sleep(0.1)
|
||||
PY"#
|
||||
.to_string();
|
||||
|
||||
// Start the session and collect ~3s of output.
|
||||
let params = ExecCommandParams {
|
||||
cmd,
|
||||
yield_time_ms: 3_000,
|
||||
max_output_tokens: 1_000, // large enough to avoid truncation here
|
||||
shell: "/bin/bash".to_string(),
|
||||
login: false,
|
||||
};
|
||||
let initial_output = match session_manager
|
||||
.handle_exec_command_request(params.clone())
|
||||
.await
|
||||
{
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
// PTY may be restricted in some sandboxes; skip in that case.
|
||||
if e.contains("openpty") || e.contains("Operation not permitted") {
|
||||
eprintln!("skipping test due to restricted PTY: {e}");
|
||||
return;
|
||||
}
|
||||
panic!("exec request failed unexpectedly: {e}");
|
||||
}
|
||||
};
|
||||
eprintln!("initial output: {initial_output:?}");
|
||||
|
||||
// Should be ongoing (we launched a never-ending loop).
|
||||
let session_id = match initial_output.exit_status {
|
||||
ExitStatus::Ongoing(id) => id,
|
||||
_ => panic!("expected ongoing session"),
|
||||
};
|
||||
|
||||
// Parse the numeric lines and get the max observed value in the first window.
|
||||
let first_nums = extract_monotonic_numbers(&initial_output.output);
|
||||
assert!(
|
||||
!first_nums.is_empty(),
|
||||
"expected some output from first window"
|
||||
);
|
||||
let first_max = *first_nums.iter().max().unwrap();
|
||||
|
||||
// Wait ~4s so counters progress while we're not reading.
|
||||
sleep(Duration::from_millis(4_000)).await;
|
||||
|
||||
// Now read ~3s of output "from now" only.
|
||||
// Use a small token cap so truncation occurs and we test middle truncation.
|
||||
let write_params = WriteStdinParams {
|
||||
session_id,
|
||||
chars: String::new(),
|
||||
yield_time_ms: 3_000,
|
||||
max_output_tokens: 16, // 16 tokens ~= 64 bytes -> likely truncation
|
||||
};
|
||||
let second = session_manager
|
||||
.handle_write_stdin_request(write_params)
|
||||
.await
|
||||
.expect("write stdin should succeed");
|
||||
|
||||
// Verify truncation metadata and size bound (cap is tokens*4 bytes).
|
||||
assert!(second.original_token_count.is_some());
|
||||
let cap_bytes = (16u64 * 4) as usize;
|
||||
assert!(second.output.len() <= cap_bytes);
|
||||
// New middle marker should be present.
|
||||
assert!(
|
||||
second.output.contains("tokens truncated") && second.output.contains('…'),
|
||||
"expected truncation marker in output, got: {}",
|
||||
second.output
|
||||
);
|
||||
|
||||
// Minimal freshness check: the earliest number we see in the second window
|
||||
// should be significantly larger than the last from the first window.
|
||||
let second_nums = extract_monotonic_numbers(&second.output);
|
||||
assert!(
|
||||
!second_nums.is_empty(),
|
||||
"expected some numeric output from second window"
|
||||
);
|
||||
let second_min = *second_nums.iter().min().unwrap();
|
||||
|
||||
// We slept 4 seconds (~40 ticks at 100ms/tick, each +100), so expect
|
||||
// an increase of roughly 4000 or more. Allow a generous margin.
|
||||
assert!(
|
||||
second_min >= first_max + 2000,
|
||||
"second_min={second_min} first_max={first_max}",
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn extract_monotonic_numbers(s: &str) -> Vec<i64> {
|
||||
s.lines()
|
||||
.filter_map(|line| {
|
||||
if !line.is_empty()
|
||||
&& line.chars().all(|c| c.is_ascii_digit())
|
||||
&& let Ok(n) = line.parse::<i64>()
|
||||
{
|
||||
// Our generator increments by 100; ignore spurious fragments.
|
||||
if n % 100 == 0 {
|
||||
return Some(n);
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_text_output_exited_no_truncation() {
|
||||
let out = ExecCommandOutput {
|
||||
wall_time: Duration::from_millis(1234),
|
||||
exit_status: ExitStatus::Exited(0),
|
||||
original_token_count: None,
|
||||
output: "hello".to_string(),
|
||||
};
|
||||
let text = out.to_text_output();
|
||||
let expected = r#"Wall time: 1.234 seconds
|
||||
Process exited with code 0
|
||||
Output:
|
||||
hello"#;
|
||||
assert_eq!(expected, text);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_text_output_ongoing_with_truncation() {
|
||||
let out = ExecCommandOutput {
|
||||
wall_time: Duration::from_millis(500),
|
||||
exit_status: ExitStatus::Ongoing(SessionId(42)),
|
||||
original_token_count: Some(1000),
|
||||
output: "abc".to_string(),
|
||||
};
|
||||
let text = out.to_text_output();
|
||||
let expected = r#"Wall time: 0.500 seconds
|
||||
Process running with session ID 42
|
||||
Warning: truncated output (original token count: 1000)
|
||||
Output:
|
||||
abc"#;
|
||||
assert_eq!(expected, text);
|
||||
}
|
||||
}
|
||||
@@ -1,109 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::CODEX_APPLY_PATCH_ARG1;
|
||||
use crate::apply_patch::ApplyPatchExec;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::executor::ExecutorConfig;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
|
||||
pub(crate) enum ExecutionMode {
|
||||
Shell,
|
||||
ApplyPatch(ApplyPatchExec),
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
/// Backend-specific hooks that prepare and post-process execution requests for a
|
||||
/// given [`ExecutionMode`].
|
||||
pub(crate) trait ExecutionBackend: Send + Sync {
|
||||
fn prepare(
|
||||
&self,
|
||||
params: ExecParams,
|
||||
// Required for downcasting the apply_patch.
|
||||
mode: &ExecutionMode,
|
||||
config: &ExecutorConfig,
|
||||
) -> Result<ExecParams, FunctionCallError>;
|
||||
|
||||
fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
static SHELL_BACKEND: ShellBackend = ShellBackend;
|
||||
static APPLY_PATCH_BACKEND: ApplyPatchBackend = ApplyPatchBackend;
|
||||
|
||||
pub(crate) fn backend_for_mode(mode: &ExecutionMode) -> &'static dyn ExecutionBackend {
|
||||
match mode {
|
||||
ExecutionMode::Shell => &SHELL_BACKEND,
|
||||
ExecutionMode::ApplyPatch(_) => &APPLY_PATCH_BACKEND,
|
||||
}
|
||||
}
|
||||
|
||||
struct ShellBackend;
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionBackend for ShellBackend {
|
||||
fn prepare(
|
||||
&self,
|
||||
params: ExecParams,
|
||||
mode: &ExecutionMode,
|
||||
_config: &ExecutorConfig,
|
||||
) -> Result<ExecParams, FunctionCallError> {
|
||||
match mode {
|
||||
ExecutionMode::Shell => Ok(params),
|
||||
_ => Err(FunctionCallError::RespondToModel(
|
||||
"shell backend invoked with non-shell mode".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ApplyPatchBackend;
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionBackend for ApplyPatchBackend {
|
||||
fn prepare(
|
||||
&self,
|
||||
params: ExecParams,
|
||||
mode: &ExecutionMode,
|
||||
config: &ExecutorConfig,
|
||||
) -> Result<ExecParams, FunctionCallError> {
|
||||
match mode {
|
||||
ExecutionMode::ApplyPatch(exec) => {
|
||||
let path_to_codex = if let Some(exe_path) = &config.codex_exe {
|
||||
exe_path.to_string_lossy().to_string()
|
||||
} else {
|
||||
env::current_exe()
|
||||
.ok()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.ok_or_else(|| {
|
||||
FunctionCallError::RespondToModel(
|
||||
"failed to determine path to codex executable".to_string(),
|
||||
)
|
||||
})?
|
||||
};
|
||||
|
||||
let patch = exec.action.patch.clone();
|
||||
Ok(ExecParams {
|
||||
command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
|
||||
cwd: exec.action.cwd.clone(),
|
||||
timeout_ms: params.timeout_ms,
|
||||
// Run apply_patch with a minimal environment for determinism and to
|
||||
// avoid leaking host environment variables into the patch process.
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification,
|
||||
})
|
||||
}
|
||||
ExecutionMode::Shell => Err(FunctionCallError::RespondToModel(
|
||||
"apply_patch backend invoked without patch context".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
/// Thread-safe store of user approvals so repeated commands can reuse
|
||||
/// previously granted trust.
|
||||
pub(crate) struct ApprovalCache {
|
||||
inner: Arc<Mutex<HashSet<Vec<String>>>>,
|
||||
}
|
||||
|
||||
impl ApprovalCache {
|
||||
pub(crate) fn insert(&self, command: Vec<String>) {
|
||||
if command.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Ok(mut guard) = self.inner.lock() {
|
||||
guard.insert(command);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn snapshot(&self) -> HashSet<Vec<String>> {
|
||||
self.inner.lock().map(|g| g.clone()).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn insert_ignores_empty_and_dedupes() {
|
||||
let cache = ApprovalCache::default();
|
||||
|
||||
// Empty should be ignored
|
||||
cache.insert(vec![]);
|
||||
assert!(cache.snapshot().is_empty());
|
||||
|
||||
// Insert a command and verify snapshot contains it
|
||||
let cmd = vec!["foo".to_string(), "bar".to_string()];
|
||||
cache.insert(cmd.clone());
|
||||
let snap1 = cache.snapshot();
|
||||
assert!(snap1.contains(&cmd));
|
||||
|
||||
// Reinserting should not create duplicates
|
||||
cache.insert(cmd);
|
||||
let snap2 = cache.snapshot();
|
||||
assert_eq!(snap1, snap2);
|
||||
}
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
mod backends;
|
||||
mod cache;
|
||||
mod runner;
|
||||
mod sandbox;
|
||||
|
||||
pub(crate) use backends::ExecutionMode;
|
||||
pub(crate) use runner::ExecutionRequest;
|
||||
pub(crate) use runner::Executor;
|
||||
pub(crate) use runner::ExecutorConfig;
|
||||
pub(crate) use runner::normalize_exec_result;
|
||||
|
||||
pub(crate) mod linkers {
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec::StdoutStream;
|
||||
use crate::executor::backends::ExecutionMode;
|
||||
use crate::executor::runner::ExecutionRequest;
|
||||
use crate::tools::context::ExecCommandContext;
|
||||
|
||||
pub struct PreparedExec {
|
||||
pub(crate) context: ExecCommandContext,
|
||||
pub(crate) request: ExecutionRequest,
|
||||
}
|
||||
|
||||
impl PreparedExec {
|
||||
pub fn new(
|
||||
context: ExecCommandContext,
|
||||
params: ExecParams,
|
||||
approval_command: Vec<String>,
|
||||
mode: ExecutionMode,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
use_shell_profile: bool,
|
||||
) -> Self {
|
||||
let request = ExecutionRequest {
|
||||
params,
|
||||
approval_command,
|
||||
mode,
|
||||
stdout_stream,
|
||||
use_shell_profile,
|
||||
};
|
||||
|
||||
Self { context, request }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod errors {
|
||||
use crate::error::CodexErr;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ExecError {
|
||||
#[error(transparent)]
|
||||
Function(#[from] FunctionCallError),
|
||||
#[error(transparent)]
|
||||
Codex(#[from] CodexErr),
|
||||
}
|
||||
|
||||
impl ExecError {
|
||||
pub(crate) fn rejection(msg: impl Into<String>) -> Self {
|
||||
FunctionCallError::RespondToModel(msg.into()).into()
|
||||
}
|
||||
|
||||
pub(crate) fn denied(msg: impl Into<String>) -> Self {
|
||||
FunctionCallError::Denied(msg.into()).into()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,433 +0,0 @@
|
||||
use std::future::Future;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::RwLock;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::backends::ExecutionMode;
|
||||
use super::backends::backend_for_mode;
|
||||
use super::cache::ApprovalCache;
|
||||
use crate::codex::Session;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::error::get_error_message_ui;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::StdoutStream;
|
||||
use crate::exec::StreamOutput;
|
||||
use crate::exec::process_exec_tool_call;
|
||||
use crate::executor::errors::ExecError;
|
||||
use crate::executor::sandbox::select_sandbox;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::ReviewDecision;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::shell;
|
||||
use crate::tools::context::ExecCommandContext;
|
||||
use codex_otel::otel_event_manager::ToolDecisionSource;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct ExecutorConfig {
|
||||
pub(crate) sandbox_policy: SandboxPolicy,
|
||||
pub(crate) sandbox_cwd: PathBuf,
|
||||
pub(crate) codex_exe: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl ExecutorConfig {
|
||||
pub(crate) fn new(
|
||||
sandbox_policy: SandboxPolicy,
|
||||
sandbox_cwd: PathBuf,
|
||||
codex_exe: Option<PathBuf>,
|
||||
) -> Self {
|
||||
Self {
|
||||
sandbox_policy,
|
||||
sandbox_cwd,
|
||||
codex_exe,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Coordinates sandbox selection, backend-specific preparation, and command
|
||||
/// execution for tool calls requested by the model.
|
||||
pub(crate) struct Executor {
|
||||
approval_cache: ApprovalCache,
|
||||
config: Arc<RwLock<ExecutorConfig>>,
|
||||
}
|
||||
|
||||
impl Executor {
|
||||
pub(crate) fn new(config: ExecutorConfig) -> Self {
|
||||
Self {
|
||||
approval_cache: ApprovalCache::default(),
|
||||
config: Arc::new(RwLock::new(config)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Updates the sandbox policy and working directory used for future
|
||||
/// executions without recreating the executor.
|
||||
pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) {
|
||||
if let Ok(mut cfg) = self.config.write() {
|
||||
cfg.sandbox_policy = sandbox_policy;
|
||||
cfg.sandbox_cwd = sandbox_cwd;
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs a prepared execution request end-to-end: prepares parameters, decides on
|
||||
/// sandbox placement (prompting the user when necessary), launches the command,
|
||||
/// and lets the backend post-process the final output.
|
||||
pub(crate) async fn run<F, Fut>(
|
||||
&self,
|
||||
mut request: ExecutionRequest,
|
||||
session: &Session,
|
||||
approval_policy: AskForApproval,
|
||||
context: &ExecCommandContext,
|
||||
on_exec_begin: F,
|
||||
) -> Result<ExecToolCallOutput, ExecError>
|
||||
where
|
||||
F: FnOnce() -> Fut,
|
||||
Fut: Future<Output = ()>,
|
||||
{
|
||||
if matches!(request.mode, ExecutionMode::Shell) {
|
||||
request.params =
|
||||
maybe_translate_shell_command(request.params, session, request.use_shell_profile);
|
||||
}
|
||||
|
||||
// Step 1: Snapshot sandbox configuration so it stays stable for this run.
|
||||
let config = self
|
||||
.config
|
||||
.read()
|
||||
.map_err(|_| ExecError::rejection("executor config poisoned"))?
|
||||
.clone();
|
||||
|
||||
// Step 2: Normalise parameters via the selected backend.
|
||||
let backend = backend_for_mode(&request.mode);
|
||||
let stdout_stream = if backend.stream_stdout(&request.mode) {
|
||||
request.stdout_stream.clone()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
request.params = backend
|
||||
.prepare(request.params, &request.mode, &config)
|
||||
.map_err(ExecError::from)?;
|
||||
|
||||
// Step 3: Decide sandbox placement, prompting for approval when needed.
|
||||
let sandbox_decision = select_sandbox(
|
||||
&request,
|
||||
approval_policy,
|
||||
self.approval_cache.snapshot(),
|
||||
&config,
|
||||
session,
|
||||
&context.sub_id,
|
||||
&context.call_id,
|
||||
&context.otel_event_manager,
|
||||
)
|
||||
.await?;
|
||||
if sandbox_decision.record_session_approval {
|
||||
self.approval_cache.insert(request.approval_command.clone());
|
||||
}
|
||||
on_exec_begin().await;
|
||||
// Step 4: Launch the command within the chosen sandbox.
|
||||
let first_attempt = self
|
||||
.spawn(
|
||||
request.params.clone(),
|
||||
sandbox_decision.initial_sandbox,
|
||||
&config,
|
||||
stdout_stream.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry.
|
||||
match first_attempt {
|
||||
Ok(output) => Ok(output),
|
||||
Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => {
|
||||
Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into())
|
||||
}
|
||||
Err(CodexErr::Sandbox(error)) => {
|
||||
if sandbox_decision.escalate_on_failure {
|
||||
self.retry_without_sandbox(
|
||||
&request,
|
||||
&config,
|
||||
session,
|
||||
context,
|
||||
stdout_stream,
|
||||
error,
|
||||
)
|
||||
.await
|
||||
} else {
|
||||
let message = sandbox_failure_message(error);
|
||||
Err(ExecError::rejection(message))
|
||||
}
|
||||
}
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Fallback path invoked when a sandboxed run is denied so the user can
|
||||
/// approve rerunning without isolation.
|
||||
async fn retry_without_sandbox(
|
||||
&self,
|
||||
request: &ExecutionRequest,
|
||||
config: &ExecutorConfig,
|
||||
session: &Session,
|
||||
context: &ExecCommandContext,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
sandbox_error: SandboxErr,
|
||||
) -> Result<ExecToolCallOutput, ExecError> {
|
||||
session
|
||||
.notify_background_event(
|
||||
&context.sub_id,
|
||||
format!("Execution failed: {sandbox_error}"),
|
||||
)
|
||||
.await;
|
||||
let decision = session
|
||||
.request_command_approval(
|
||||
context.sub_id.to_string(),
|
||||
context.call_id.to_string(),
|
||||
request.approval_command.clone(),
|
||||
request.params.cwd.clone(),
|
||||
Some("command failed; retry without sandbox?".to_string()),
|
||||
)
|
||||
.await;
|
||||
|
||||
context.otel_event_manager.tool_decision(
|
||||
&context.tool_name,
|
||||
&context.call_id,
|
||||
decision,
|
||||
ToolDecisionSource::User,
|
||||
);
|
||||
match decision {
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
|
||||
if matches!(decision, ReviewDecision::ApprovedForSession) {
|
||||
self.approval_cache.insert(request.approval_command.clone());
|
||||
}
|
||||
session
|
||||
.notify_background_event(&context.sub_id, "retrying command without sandbox")
|
||||
.await;
|
||||
|
||||
let retry_output = self
|
||||
.spawn(
|
||||
request.params.clone(),
|
||||
SandboxType::None,
|
||||
config,
|
||||
stdout_stream,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(retry_output)
|
||||
}
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
Err(ExecError::denied("exec command rejected by user"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn spawn(
|
||||
&self,
|
||||
params: ExecParams,
|
||||
sandbox: SandboxType,
|
||||
config: &ExecutorConfig,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
) -> Result<ExecToolCallOutput, CodexErr> {
|
||||
process_exec_tool_call(
|
||||
params,
|
||||
sandbox,
|
||||
&config.sandbox_policy,
|
||||
&config.sandbox_cwd,
|
||||
&config.codex_exe,
|
||||
stdout_stream,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_translate_shell_command(
|
||||
params: ExecParams,
|
||||
session: &Session,
|
||||
use_shell_profile: bool,
|
||||
) -> ExecParams {
|
||||
let should_translate =
|
||||
matches!(session.user_shell(), shell::Shell::PowerShell(_)) || use_shell_profile;
|
||||
|
||||
if should_translate
|
||||
&& let Some(command) = session
|
||||
.user_shell()
|
||||
.format_default_shell_invocation(params.command.clone())
|
||||
{
|
||||
return ExecParams { command, ..params };
|
||||
}
|
||||
|
||||
params
|
||||
}
|
||||
|
||||
fn sandbox_failure_message(error: SandboxErr) -> String {
|
||||
let codex_error = CodexErr::Sandbox(error);
|
||||
let friendly = get_error_message_ui(&codex_error);
|
||||
format!("failed in sandbox: {friendly}")
|
||||
}
|
||||
|
||||
pub(crate) struct ExecutionRequest {
|
||||
pub params: ExecParams,
|
||||
pub approval_command: Vec<String>,
|
||||
pub mode: ExecutionMode,
|
||||
pub stdout_stream: Option<StdoutStream>,
|
||||
pub use_shell_profile: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct NormalizedExecOutput<'a> {
|
||||
borrowed: Option<&'a ExecToolCallOutput>,
|
||||
synthetic: Option<ExecToolCallOutput>,
|
||||
}
|
||||
|
||||
impl<'a> NormalizedExecOutput<'a> {
|
||||
pub(crate) fn event_output(&'a self) -> &'a ExecToolCallOutput {
|
||||
match (self.borrowed, self.synthetic.as_ref()) {
|
||||
(Some(output), _) => output,
|
||||
(None, Some(output)) => output,
|
||||
(None, None) => unreachable!("normalized exec output missing data"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a raw execution result into a uniform view that always exposes an
|
||||
/// [`ExecToolCallOutput`], synthesizing error output when the command fails
|
||||
/// before producing a response.
|
||||
pub(crate) fn normalize_exec_result(
|
||||
result: &Result<ExecToolCallOutput, ExecError>,
|
||||
) -> NormalizedExecOutput<'_> {
|
||||
match result {
|
||||
Ok(output) => NormalizedExecOutput {
|
||||
borrowed: Some(output),
|
||||
synthetic: None,
|
||||
},
|
||||
Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => {
|
||||
NormalizedExecOutput {
|
||||
borrowed: Some(output.as_ref()),
|
||||
synthetic: None,
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
let message = match err {
|
||||
ExecError::Function(FunctionCallError::RespondToModel(msg))
|
||||
| ExecError::Function(FunctionCallError::Denied(msg)) => msg.clone(),
|
||||
ExecError::Codex(e) => get_error_message_ui(e),
|
||||
err => err.to_string(),
|
||||
};
|
||||
let synthetic = ExecToolCallOutput {
|
||||
exit_code: -1,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new(message.clone()),
|
||||
aggregated_output: StreamOutput::new(message),
|
||||
duration: Duration::default(),
|
||||
timed_out: false,
|
||||
};
|
||||
NormalizedExecOutput {
|
||||
borrowed: None,
|
||||
synthetic: Some(synthetic),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::EnvVarError;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec::StreamOutput;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn make_output(text: &str) -> ExecToolCallOutput {
|
||||
ExecToolCallOutput {
|
||||
exit_code: 1,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new(text.to_string()),
|
||||
duration: Duration::from_millis(123),
|
||||
timed_out: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_success_borrows() {
|
||||
let out = make_output("ok");
|
||||
let result: Result<ExecToolCallOutput, ExecError> = Ok(out);
|
||||
let normalized = normalize_exec_result(&result);
|
||||
assert_eq!(normalized.event_output().aggregated_output.text, "ok");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_timeout_borrows_embedded_output() {
|
||||
let out = make_output("timed out payload");
|
||||
let err = CodexErr::Sandbox(SandboxErr::Timeout {
|
||||
output: Box::new(out),
|
||||
});
|
||||
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(err));
|
||||
let normalized = normalize_exec_result(&result);
|
||||
assert_eq!(
|
||||
normalized.event_output().aggregated_output.text,
|
||||
"timed out payload"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_failure_message_uses_denied_stderr() {
|
||||
let output = ExecToolCallOutput {
|
||||
exit_code: 101,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new("sandbox stderr".to_string()),
|
||||
aggregated_output: StreamOutput::new(String::new()),
|
||||
duration: Duration::from_millis(10),
|
||||
timed_out: false,
|
||||
};
|
||||
let err = SandboxErr::Denied {
|
||||
output: Box::new(output),
|
||||
};
|
||||
let message = sandbox_failure_message(err);
|
||||
assert_eq!(message, "failed in sandbox: sandbox stderr");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_failure_message_falls_back_to_aggregated_output() {
|
||||
let output = ExecToolCallOutput {
|
||||
exit_code: 101,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new("aggregate text".to_string()),
|
||||
duration: Duration::from_millis(10),
|
||||
timed_out: false,
|
||||
};
|
||||
let err = SandboxErr::Denied {
|
||||
output: Box::new(output),
|
||||
};
|
||||
let message = sandbox_failure_message(err);
|
||||
assert_eq!(message, "failed in sandbox: aggregate text");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_function_error_synthesizes_payload() {
|
||||
let err = FunctionCallError::RespondToModel("boom".to_string());
|
||||
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Function(err));
|
||||
let normalized = normalize_exec_result(&result);
|
||||
assert_eq!(normalized.event_output().aggregated_output.text, "boom");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_codex_error_synthesizes_user_message() {
|
||||
// Use a simple EnvVar error which formats to a clear message
|
||||
let e = CodexErr::EnvVar(EnvVarError {
|
||||
var: "FOO".to_string(),
|
||||
instructions: Some("set it".to_string()),
|
||||
});
|
||||
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(e));
|
||||
let normalized = normalize_exec_result(&result);
|
||||
assert!(
|
||||
normalized
|
||||
.event_output()
|
||||
.aggregated_output
|
||||
.text
|
||||
.contains("Missing environment variable: `FOO`"),
|
||||
"expected synthesized user-friendly message"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,405 +0,0 @@
|
||||
use crate::apply_patch::ApplyPatchExec;
|
||||
use crate::codex::Session;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::executor::ExecutionMode;
|
||||
use crate::executor::ExecutionRequest;
|
||||
use crate::executor::ExecutorConfig;
|
||||
use crate::executor::errors::ExecError;
|
||||
use crate::safety::SafetyCheck;
|
||||
use crate::safety::assess_command_safety;
|
||||
use crate::safety::assess_patch_safety;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_otel::otel_event_manager::ToolDecisionSource;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Sandbox placement options selected for an execution run, including whether
|
||||
/// to escalate after failures and whether approvals should persist.
|
||||
pub(crate) struct SandboxDecision {
|
||||
pub(crate) initial_sandbox: SandboxType,
|
||||
pub(crate) escalate_on_failure: bool,
|
||||
pub(crate) record_session_approval: bool,
|
||||
}
|
||||
|
||||
impl SandboxDecision {
|
||||
fn auto(sandbox: SandboxType, escalate_on_failure: bool) -> Self {
|
||||
Self {
|
||||
initial_sandbox: sandbox,
|
||||
escalate_on_failure,
|
||||
record_session_approval: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn user_override(record_session_approval: bool) -> Self {
|
||||
Self {
|
||||
initial_sandbox: SandboxType::None,
|
||||
escalate_on_failure: false,
|
||||
record_session_approval,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
|
||||
matches!(
|
||||
(approval, sandbox),
|
||||
(
|
||||
AskForApproval::UnlessTrusted | AskForApproval::OnFailure,
|
||||
SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/// Determines how a command should be sandboxed, prompting the user when
|
||||
/// policy requires explicit approval.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn select_sandbox(
|
||||
request: &ExecutionRequest,
|
||||
approval_policy: AskForApproval,
|
||||
approval_cache: HashSet<Vec<String>>,
|
||||
config: &ExecutorConfig,
|
||||
session: &Session,
|
||||
sub_id: &str,
|
||||
call_id: &str,
|
||||
otel_event_manager: &OtelEventManager,
|
||||
) -> Result<SandboxDecision, ExecError> {
|
||||
match &request.mode {
|
||||
ExecutionMode::Shell => {
|
||||
select_shell_sandbox(
|
||||
request,
|
||||
approval_policy,
|
||||
approval_cache,
|
||||
config,
|
||||
session,
|
||||
sub_id,
|
||||
call_id,
|
||||
otel_event_manager,
|
||||
)
|
||||
.await
|
||||
}
|
||||
ExecutionMode::ApplyPatch(exec) => {
|
||||
select_apply_patch_sandbox(exec, approval_policy, config)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn select_shell_sandbox(
|
||||
request: &ExecutionRequest,
|
||||
approval_policy: AskForApproval,
|
||||
approved_snapshot: HashSet<Vec<String>>,
|
||||
config: &ExecutorConfig,
|
||||
session: &Session,
|
||||
sub_id: &str,
|
||||
call_id: &str,
|
||||
otel_event_manager: &OtelEventManager,
|
||||
) -> Result<SandboxDecision, ExecError> {
|
||||
let command_for_safety = if request.approval_command.is_empty() {
|
||||
request.params.command.clone()
|
||||
} else {
|
||||
request.approval_command.clone()
|
||||
};
|
||||
|
||||
let safety = assess_command_safety(
|
||||
&command_for_safety,
|
||||
approval_policy,
|
||||
&config.sandbox_policy,
|
||||
&approved_snapshot,
|
||||
request.params.with_escalated_permissions.unwrap_or(false),
|
||||
);
|
||||
|
||||
match safety {
|
||||
SafetyCheck::AutoApprove {
|
||||
sandbox_type,
|
||||
user_explicitly_approved,
|
||||
} => {
|
||||
let mut decision = SandboxDecision::auto(
|
||||
sandbox_type,
|
||||
should_escalate_on_failure(approval_policy, sandbox_type),
|
||||
);
|
||||
if user_explicitly_approved {
|
||||
decision.record_session_approval = true;
|
||||
}
|
||||
let (decision_for_event, source) = if user_explicitly_approved {
|
||||
(ReviewDecision::ApprovedForSession, ToolDecisionSource::User)
|
||||
} else {
|
||||
(ReviewDecision::Approved, ToolDecisionSource::Config)
|
||||
};
|
||||
otel_event_manager.tool_decision("local_shell", call_id, decision_for_event, source);
|
||||
Ok(decision)
|
||||
}
|
||||
SafetyCheck::AskUser => {
|
||||
let decision = session
|
||||
.request_command_approval(
|
||||
sub_id.to_string(),
|
||||
call_id.to_string(),
|
||||
request.approval_command.clone(),
|
||||
request.params.cwd.clone(),
|
||||
request.params.justification.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
otel_event_manager.tool_decision(
|
||||
"local_shell",
|
||||
call_id,
|
||||
decision,
|
||||
ToolDecisionSource::User,
|
||||
);
|
||||
match decision {
|
||||
ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)),
|
||||
ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)),
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
Err(ExecError::denied("exec command rejected by user"))
|
||||
}
|
||||
}
|
||||
}
|
||||
SafetyCheck::Reject { reason } => Err(ExecError::rejection(format!(
|
||||
"exec command rejected: {reason}"
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn select_apply_patch_sandbox(
|
||||
exec: &ApplyPatchExec,
|
||||
approval_policy: AskForApproval,
|
||||
config: &ExecutorConfig,
|
||||
) -> Result<SandboxDecision, ExecError> {
|
||||
if exec.user_explicitly_approved_this_action {
|
||||
return Ok(SandboxDecision::user_override(false));
|
||||
}
|
||||
|
||||
match assess_patch_safety(
|
||||
&exec.action,
|
||||
approval_policy,
|
||||
&config.sandbox_policy,
|
||||
&config.sandbox_cwd,
|
||||
) {
|
||||
SafetyCheck::AutoApprove { sandbox_type, .. } => Ok(SandboxDecision::auto(
|
||||
sandbox_type,
|
||||
should_escalate_on_failure(approval_policy, sandbox_type),
|
||||
)),
|
||||
SafetyCheck::AskUser => Err(ExecError::rejection(
|
||||
"patch requires approval but none was recorded",
|
||||
)),
|
||||
SafetyCheck::Reject { reason } => {
|
||||
Err(ExecError::rejection(format!("patch rejected: {reason}")))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::codex::make_session_and_context;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use codex_apply_patch::ApplyPatchAction;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_apply_patch_user_override_when_explicit() {
|
||||
let (session, ctx) = make_session_and_context();
|
||||
let tmp = tempfile::tempdir().expect("tmp");
|
||||
let p = tmp.path().join("a.txt");
|
||||
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
|
||||
let exec = ApplyPatchExec {
|
||||
action,
|
||||
user_explicitly_approved_this_action: true,
|
||||
};
|
||||
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
|
||||
let request = ExecutionRequest {
|
||||
params: ExecParams {
|
||||
command: vec!["apply_patch".into()],
|
||||
cwd: std::env::temp_dir(),
|
||||
timeout_ms: None,
|
||||
env: std::collections::HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
approval_command: vec!["apply_patch".into()],
|
||||
mode: ExecutionMode::ApplyPatch(exec),
|
||||
stdout_stream: None,
|
||||
use_shell_profile: false,
|
||||
};
|
||||
let otel_event_manager = ctx.client.get_otel_event_manager();
|
||||
let decision = select_sandbox(
|
||||
&request,
|
||||
AskForApproval::OnRequest,
|
||||
Default::default(),
|
||||
&cfg,
|
||||
&session,
|
||||
"sub",
|
||||
"call",
|
||||
&otel_event_manager,
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
// Explicit user override runs without sandbox
|
||||
assert_eq!(decision.initial_sandbox, SandboxType::None);
|
||||
assert_eq!(decision.escalate_on_failure, false);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_apply_patch_autoapprove_in_danger() {
|
||||
let (session, ctx) = make_session_and_context();
|
||||
let tmp = tempfile::tempdir().expect("tmp");
|
||||
let p = tmp.path().join("a.txt");
|
||||
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
|
||||
let exec = ApplyPatchExec {
|
||||
action,
|
||||
user_explicitly_approved_this_action: false,
|
||||
};
|
||||
let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
|
||||
let request = ExecutionRequest {
|
||||
params: ExecParams {
|
||||
command: vec!["apply_patch".into()],
|
||||
cwd: std::env::temp_dir(),
|
||||
timeout_ms: None,
|
||||
env: std::collections::HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
approval_command: vec!["apply_patch".into()],
|
||||
mode: ExecutionMode::ApplyPatch(exec),
|
||||
stdout_stream: None,
|
||||
use_shell_profile: false,
|
||||
};
|
||||
let otel_event_manager = ctx.client.get_otel_event_manager();
|
||||
let decision = select_sandbox(
|
||||
&request,
|
||||
AskForApproval::OnRequest,
|
||||
Default::default(),
|
||||
&cfg,
|
||||
&session,
|
||||
"sub",
|
||||
"call",
|
||||
&otel_event_manager,
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
// On platforms with a sandbox, DangerFullAccess still prefers it
|
||||
let expected = crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None);
|
||||
assert_eq!(decision.initial_sandbox, expected);
|
||||
assert_eq!(decision.escalate_on_failure, false);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_apply_patch_requires_approval_on_unless_trusted() {
|
||||
let (session, ctx) = make_session_and_context();
|
||||
let tempdir = tempfile::tempdir().expect("tmpdir");
|
||||
let p = tempdir.path().join("a.txt");
|
||||
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
|
||||
let exec = ApplyPatchExec {
|
||||
action,
|
||||
user_explicitly_approved_this_action: false,
|
||||
};
|
||||
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
|
||||
let request = ExecutionRequest {
|
||||
params: ExecParams {
|
||||
command: vec!["apply_patch".into()],
|
||||
cwd: std::env::temp_dir(),
|
||||
timeout_ms: None,
|
||||
env: std::collections::HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
approval_command: vec!["apply_patch".into()],
|
||||
mode: ExecutionMode::ApplyPatch(exec),
|
||||
stdout_stream: None,
|
||||
use_shell_profile: false,
|
||||
};
|
||||
let otel_event_manager = ctx.client.get_otel_event_manager();
|
||||
let result = select_sandbox(
|
||||
&request,
|
||||
AskForApproval::UnlessTrusted,
|
||||
Default::default(),
|
||||
&cfg,
|
||||
&session,
|
||||
"sub",
|
||||
"call",
|
||||
&otel_event_manager,
|
||||
)
|
||||
.await;
|
||||
match result {
|
||||
Ok(_) => panic!("expected error"),
|
||||
Err(ExecError::Function(FunctionCallError::RespondToModel(msg))) => {
|
||||
assert!(msg.contains("requires approval"))
|
||||
}
|
||||
Err(other) => panic!("unexpected error: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_shell_autoapprove_in_danger_mode() {
|
||||
let (session, ctx) = make_session_and_context();
|
||||
let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
|
||||
let request = ExecutionRequest {
|
||||
params: ExecParams {
|
||||
command: vec!["some-unknown".into()],
|
||||
cwd: std::env::temp_dir(),
|
||||
timeout_ms: None,
|
||||
env: std::collections::HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
approval_command: vec!["some-unknown".into()],
|
||||
mode: ExecutionMode::Shell,
|
||||
stdout_stream: None,
|
||||
use_shell_profile: false,
|
||||
};
|
||||
let otel_event_manager = ctx.client.get_otel_event_manager();
|
||||
let decision = select_sandbox(
|
||||
&request,
|
||||
AskForApproval::OnRequest,
|
||||
Default::default(),
|
||||
&cfg,
|
||||
&session,
|
||||
"sub",
|
||||
"call",
|
||||
&otel_event_manager,
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
assert_eq!(decision.initial_sandbox, SandboxType::None);
|
||||
assert_eq!(decision.escalate_on_failure, false);
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "macos", target_os = "linux"))]
|
||||
#[tokio::test]
|
||||
async fn select_shell_escalates_on_failure_with_platform_sandbox() {
|
||||
let (session, ctx) = make_session_and_context();
|
||||
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
|
||||
let request = ExecutionRequest {
|
||||
params: ExecParams {
|
||||
// Unknown command => untrusted but not flagged dangerous
|
||||
command: vec!["some-unknown".into()],
|
||||
cwd: std::env::temp_dir(),
|
||||
timeout_ms: None,
|
||||
env: std::collections::HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
approval_command: vec!["some-unknown".into()],
|
||||
mode: ExecutionMode::Shell,
|
||||
stdout_stream: None,
|
||||
use_shell_profile: false,
|
||||
};
|
||||
let otel_event_manager = ctx.client.get_otel_event_manager();
|
||||
let decision = select_sandbox(
|
||||
&request,
|
||||
AskForApproval::OnFailure,
|
||||
Default::default(),
|
||||
&cfg,
|
||||
&session,
|
||||
"sub",
|
||||
"call",
|
||||
&otel_event_manager,
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
// On macOS/Linux we should have a platform sandbox and escalate on failure
|
||||
assert_ne!(decision.initial_sandbox, SandboxType::None);
|
||||
assert_eq!(decision.escalate_on_failure, true);
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@ pub enum FunctionCallError {
|
||||
#[error("{0}")]
|
||||
RespondToModel(String),
|
||||
#[error("{0}")]
|
||||
#[allow(dead_code)] // TODO(jif) fix in a follow-up PR
|
||||
Denied(String),
|
||||
#[error("LocalShellCall without call_id or id")]
|
||||
MissingLocalShellCallId,
|
||||
|
||||
@@ -40,7 +40,7 @@ where
|
||||
}
|
||||
|
||||
/// Converts the sandbox policy into the CLI invocation for `codex-linux-sandbox`.
|
||||
fn create_linux_sandbox_command_args(
|
||||
pub(crate) fn create_linux_sandbox_command_args(
|
||||
command: Vec<String>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
sandbox_policy_cwd: &Path,
|
||||
@@ -56,7 +56,9 @@ fn create_linux_sandbox_command_args(
|
||||
serde_json::to_string(sandbox_policy).expect("Failed to serialize SandboxPolicy to JSON");
|
||||
|
||||
let mut linux_cmd: Vec<String> = vec![
|
||||
"--sandbox-policy-cwd".to_string(),
|
||||
sandbox_policy_cwd,
|
||||
"--sandbox-policy".to_string(),
|
||||
sandbox_policy_json,
|
||||
// Separator so that command arguments starting with `-` are not parsed as
|
||||
// options of the helper itself.
|
||||
|
||||
@@ -25,9 +25,7 @@ pub mod custom_prompts;
|
||||
mod environment_context;
|
||||
pub mod error;
|
||||
pub mod exec;
|
||||
mod exec_command;
|
||||
pub mod exec_env;
|
||||
pub mod executor;
|
||||
pub mod features;
|
||||
mod flags;
|
||||
pub mod git_info;
|
||||
@@ -38,6 +36,7 @@ mod mcp_tool_call;
|
||||
mod message_history;
|
||||
mod model_provider_info;
|
||||
pub mod parse_command;
|
||||
pub mod sandboxing;
|
||||
pub mod token_data;
|
||||
mod truncate;
|
||||
mod unified_exec;
|
||||
@@ -59,7 +58,6 @@ pub use auth::CodexAuth;
|
||||
pub mod default_client;
|
||||
pub mod model_family;
|
||||
mod openai_model_info;
|
||||
mod openai_tools;
|
||||
pub mod project_doc;
|
||||
mod rollout;
|
||||
pub(crate) mod safety;
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
pub use crate::tools::spec::*;
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::collections::HashSet;
|
||||
use std::path::Component;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
@@ -8,8 +7,6 @@ use codex_apply_patch::ApplyPatchFileChange;
|
||||
|
||||
use crate::exec::SandboxType;
|
||||
|
||||
use crate::command_safety::is_dangerous_command::command_might_be_dangerous;
|
||||
use crate::command_safety::is_safe_command::is_known_safe_command;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
|
||||
@@ -48,30 +45,29 @@ pub fn assess_patch_safety(
|
||||
}
|
||||
}
|
||||
|
||||
// Even though the patch *appears* to be constrained to writable paths, it
|
||||
// is possible that paths in the patch are hard links to files outside the
|
||||
// writable roots, so we should still run `apply_patch` in a sandbox in that
|
||||
// case.
|
||||
// Even though the patch appears to be constrained to writable paths, it is
|
||||
// possible that paths in the patch are hard links to files outside the
|
||||
// writable roots, so we should still run `apply_patch` in a sandbox in that case.
|
||||
if is_write_patch_constrained_to_writable_paths(action, sandbox_policy, cwd)
|
||||
|| policy == AskForApproval::OnFailure
|
||||
{
|
||||
// Only auto‑approve when we can actually enforce a sandbox. Otherwise
|
||||
// fall back to asking the user because the patch may touch arbitrary
|
||||
// paths outside the project.
|
||||
match get_platform_sandbox() {
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove {
|
||||
sandbox_type,
|
||||
if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
|
||||
// DangerFullAccess is intended to bypass sandboxing entirely.
|
||||
SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
user_explicitly_approved: false,
|
||||
},
|
||||
None if sandbox_policy == &SandboxPolicy::DangerFullAccess => {
|
||||
// If the user has explicitly requested DangerFullAccess, then
|
||||
// we can auto-approve even without a sandbox.
|
||||
SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
user_explicitly_approved: false,
|
||||
}
|
||||
}
|
||||
None => SafetyCheck::AskUser,
|
||||
} else {
|
||||
// Only auto‑approve when we can actually enforce a sandbox. Otherwise
|
||||
// fall back to asking the user because the patch may touch arbitrary
|
||||
// paths outside the project.
|
||||
match get_platform_sandbox() {
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove {
|
||||
sandbox_type,
|
||||
user_explicitly_approved: false,
|
||||
},
|
||||
None => SafetyCheck::AskUser,
|
||||
}
|
||||
}
|
||||
} else if policy == AskForApproval::Never {
|
||||
SafetyCheck::Reject {
|
||||
@@ -83,124 +79,6 @@ pub fn assess_patch_safety(
|
||||
}
|
||||
}
|
||||
|
||||
/// For a command to be run _without_ a sandbox, one of the following must be
|
||||
/// true:
|
||||
///
|
||||
/// - the user has explicitly approved the command
|
||||
/// - the command is on the "known safe" list
|
||||
/// - `DangerFullAccess` was specified and `UnlessTrusted` was not
|
||||
pub fn assess_command_safety(
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
approved: &HashSet<Vec<String>>,
|
||||
with_escalated_permissions: bool,
|
||||
) -> SafetyCheck {
|
||||
// Some commands look dangerous. Even if they are run inside a sandbox,
|
||||
// unless the user has explicitly approved them, we should ask,
|
||||
// or reject if the approval_policy tells us not to ask.
|
||||
if command_might_be_dangerous(command) && !approved.contains(command) {
|
||||
if approval_policy == AskForApproval::Never {
|
||||
return SafetyCheck::Reject {
|
||||
reason: "dangerous command detected; rejected by user approval settings"
|
||||
.to_string(),
|
||||
};
|
||||
}
|
||||
|
||||
return SafetyCheck::AskUser;
|
||||
}
|
||||
|
||||
// A command is "trusted" because either:
|
||||
// - it belongs to a set of commands we consider "safe" by default, or
|
||||
// - the user has explicitly approved the command for this session
|
||||
//
|
||||
// Currently, whether a command is "trusted" is a simple boolean, but we
|
||||
// should include more metadata on this command test to indicate whether it
|
||||
// should be run inside a sandbox or not. (This could be something the user
|
||||
// defines as part of `execpolicy`.)
|
||||
//
|
||||
// For example, when `is_known_safe_command(command)` returns `true`, it
|
||||
// would probably be fine to run the command in a sandbox, but when
|
||||
// `approved.contains(command)` is `true`, the user may have approved it for
|
||||
// the session _because_ they know it needs to run outside a sandbox.
|
||||
|
||||
if is_known_safe_command(command) || approved.contains(command) {
|
||||
let user_explicitly_approved = approved.contains(command);
|
||||
return SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
user_explicitly_approved,
|
||||
};
|
||||
}
|
||||
|
||||
assess_safety_for_untrusted_command(approval_policy, sandbox_policy, with_escalated_permissions)
|
||||
}
|
||||
|
||||
pub(crate) fn assess_safety_for_untrusted_command(
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
with_escalated_permissions: bool,
|
||||
) -> SafetyCheck {
|
||||
use AskForApproval::*;
|
||||
use SandboxPolicy::*;
|
||||
|
||||
match (approval_policy, sandbox_policy) {
|
||||
(UnlessTrusted, _) => {
|
||||
// Even though the user may have opted into DangerFullAccess,
|
||||
// they also requested that we ask for approval for untrusted
|
||||
// commands.
|
||||
SafetyCheck::AskUser
|
||||
}
|
||||
(OnFailure, DangerFullAccess)
|
||||
| (Never, DangerFullAccess)
|
||||
| (OnRequest, DangerFullAccess) => SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
user_explicitly_approved: false,
|
||||
},
|
||||
(OnRequest, ReadOnly) | (OnRequest, WorkspaceWrite { .. }) => {
|
||||
if with_escalated_permissions {
|
||||
SafetyCheck::AskUser
|
||||
} else {
|
||||
match get_platform_sandbox() {
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove {
|
||||
sandbox_type,
|
||||
user_explicitly_approved: false,
|
||||
},
|
||||
// Fall back to asking since the command is untrusted and
|
||||
// we do not have a sandbox available
|
||||
None => SafetyCheck::AskUser,
|
||||
}
|
||||
}
|
||||
}
|
||||
(Never, ReadOnly)
|
||||
| (Never, WorkspaceWrite { .. })
|
||||
| (OnFailure, ReadOnly)
|
||||
| (OnFailure, WorkspaceWrite { .. }) => {
|
||||
match get_platform_sandbox() {
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove {
|
||||
sandbox_type,
|
||||
user_explicitly_approved: false,
|
||||
},
|
||||
None => {
|
||||
if matches!(approval_policy, OnFailure) {
|
||||
// Since the command is not trusted, even though the
|
||||
// user has requested to only ask for approval on
|
||||
// failure, we will ask the user because no sandbox is
|
||||
// available.
|
||||
SafetyCheck::AskUser
|
||||
} else {
|
||||
// We are in non-interactive mode and lack approval, so
|
||||
// all we can do is reject the command.
|
||||
SafetyCheck::Reject {
|
||||
reason: "auto-rejected because command is not on trusted list"
|
||||
.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_platform_sandbox() -> Option<SandboxType> {
|
||||
if cfg!(target_os = "macos") {
|
||||
Some(SandboxType::MacosSeatbelt)
|
||||
@@ -339,101 +217,4 @@ mod tests {
|
||||
&cwd,
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_request_escalated_privileges() {
|
||||
// Should not be a trusted command
|
||||
let command = vec!["git commit".to_string()];
|
||||
let approval_policy = AskForApproval::OnRequest;
|
||||
let sandbox_policy = SandboxPolicy::ReadOnly;
|
||||
let approved: HashSet<Vec<String>> = HashSet::new();
|
||||
let request_escalated_privileges = true;
|
||||
|
||||
let safety_check = assess_command_safety(
|
||||
&command,
|
||||
approval_policy,
|
||||
&sandbox_policy,
|
||||
&approved,
|
||||
request_escalated_privileges,
|
||||
);
|
||||
|
||||
assert_eq!(safety_check, SafetyCheck::AskUser);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dangerous_command_allowed_if_explicitly_approved() {
|
||||
let command = vec!["git".to_string(), "reset".to_string(), "--hard".to_string()];
|
||||
let approval_policy = AskForApproval::OnRequest;
|
||||
let sandbox_policy = SandboxPolicy::ReadOnly;
|
||||
let mut approved: HashSet<Vec<String>> = HashSet::new();
|
||||
approved.insert(command.clone());
|
||||
let request_escalated_privileges = false;
|
||||
|
||||
let safety_check = assess_command_safety(
|
||||
&command,
|
||||
approval_policy,
|
||||
&sandbox_policy,
|
||||
&approved,
|
||||
request_escalated_privileges,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
safety_check,
|
||||
SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
user_explicitly_approved: true,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dangerous_command_not_allowed_if_not_explicitly_approved() {
|
||||
let command = vec!["git".to_string(), "reset".to_string(), "--hard".to_string()];
|
||||
let approval_policy = AskForApproval::Never;
|
||||
let sandbox_policy = SandboxPolicy::ReadOnly;
|
||||
let approved: HashSet<Vec<String>> = HashSet::new();
|
||||
let request_escalated_privileges = false;
|
||||
|
||||
let safety_check = assess_command_safety(
|
||||
&command,
|
||||
approval_policy,
|
||||
&sandbox_policy,
|
||||
&approved,
|
||||
request_escalated_privileges,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
safety_check,
|
||||
SafetyCheck::Reject {
|
||||
reason: "dangerous command detected; rejected by user approval settings"
|
||||
.to_string(),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_request_escalated_privileges_no_sandbox_fallback() {
|
||||
let command = vec!["git".to_string(), "commit".to_string()];
|
||||
let approval_policy = AskForApproval::OnRequest;
|
||||
let sandbox_policy = SandboxPolicy::ReadOnly;
|
||||
let approved: HashSet<Vec<String>> = HashSet::new();
|
||||
let request_escalated_privileges = false;
|
||||
|
||||
let safety_check = assess_command_safety(
|
||||
&command,
|
||||
approval_policy,
|
||||
&sandbox_policy,
|
||||
&approved,
|
||||
request_escalated_privileges,
|
||||
);
|
||||
|
||||
let expected = match get_platform_sandbox() {
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove {
|
||||
sandbox_type,
|
||||
user_explicitly_approved: false,
|
||||
},
|
||||
None => SafetyCheck::AskUser,
|
||||
};
|
||||
assert_eq!(safety_check, expected);
|
||||
}
|
||||
}
|
||||
|
||||
170
codex-rs/core/src/sandboxing/mod.rs
Normal file
170
codex-rs/core/src/sandboxing/mod.rs
Normal file
@@ -0,0 +1,170 @@
|
||||
/*
|
||||
Module: sandboxing
|
||||
|
||||
Build platform wrappers and produce ExecEnv for execution. Owns low‑level
|
||||
sandbox placement and transformation of portable CommandSpec into a
|
||||
ready‑to‑spawn environment.
|
||||
*/
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::StdoutStream;
|
||||
use crate::exec::execute_exec_env;
|
||||
use crate::landlock::create_linux_sandbox_command_args;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::seatbelt::MACOS_PATH_TO_SEATBELT_EXECUTABLE;
|
||||
use crate::seatbelt::create_seatbelt_command_args;
|
||||
use crate::spawn::CODEX_SANDBOX_ENV_VAR;
|
||||
use crate::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CommandSpec {
|
||||
pub program: String,
|
||||
pub args: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub env: HashMap<String, String>,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ExecEnv {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub env: HashMap<String, String>,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub sandbox: SandboxType,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub arg0: Option<String>,
|
||||
}
|
||||
|
||||
pub enum SandboxPreference {
|
||||
Auto,
|
||||
Require,
|
||||
Forbid,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum SandboxTransformError {
|
||||
#[error("missing codex-linux-sandbox executable path")]
|
||||
MissingLinuxSandboxExecutable,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SandboxManager;
|
||||
|
||||
impl SandboxManager {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
pub(crate) fn select_initial(
|
||||
&self,
|
||||
policy: &SandboxPolicy,
|
||||
pref: SandboxablePreference,
|
||||
) -> SandboxType {
|
||||
match pref {
|
||||
SandboxablePreference::Forbid => SandboxType::None,
|
||||
SandboxablePreference::Require => {
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
return SandboxType::MacosSeatbelt;
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
return SandboxType::LinuxSeccomp;
|
||||
}
|
||||
#[allow(unreachable_code)]
|
||||
SandboxType::None
|
||||
}
|
||||
SandboxablePreference::Auto => match policy {
|
||||
SandboxPolicy::DangerFullAccess => SandboxType::None,
|
||||
#[cfg(target_os = "macos")]
|
||||
_ => SandboxType::MacosSeatbelt,
|
||||
#[cfg(target_os = "linux")]
|
||||
_ => SandboxType::LinuxSeccomp,
|
||||
#[cfg(not(any(target_os = "macos", target_os = "linux")))]
|
||||
_ => SandboxType::None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn transform(
|
||||
&self,
|
||||
spec: &CommandSpec,
|
||||
policy: &SandboxPolicy,
|
||||
sandbox: SandboxType,
|
||||
sandbox_policy_cwd: &Path,
|
||||
codex_linux_sandbox_exe: Option<&PathBuf>,
|
||||
) -> Result<ExecEnv, SandboxTransformError> {
|
||||
let mut env = spec.env.clone();
|
||||
if !policy.has_full_network_access() {
|
||||
env.insert(
|
||||
CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR.to_string(),
|
||||
"1".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let mut command = Vec::with_capacity(1 + spec.args.len());
|
||||
command.push(spec.program.clone());
|
||||
command.extend(spec.args.iter().cloned());
|
||||
|
||||
let (command, sandbox_env, arg0_override) = match sandbox {
|
||||
SandboxType::None => (command, HashMap::new(), None),
|
||||
SandboxType::MacosSeatbelt => {
|
||||
let mut seatbelt_env = HashMap::new();
|
||||
seatbelt_env.insert(CODEX_SANDBOX_ENV_VAR.to_string(), "seatbelt".to_string());
|
||||
let mut args =
|
||||
create_seatbelt_command_args(command.clone(), policy, sandbox_policy_cwd);
|
||||
let mut full_command = Vec::with_capacity(1 + args.len());
|
||||
full_command.push(MACOS_PATH_TO_SEATBELT_EXECUTABLE.to_string());
|
||||
full_command.append(&mut args);
|
||||
(full_command, seatbelt_env, None)
|
||||
}
|
||||
SandboxType::LinuxSeccomp => {
|
||||
let exe = codex_linux_sandbox_exe
|
||||
.ok_or(SandboxTransformError::MissingLinuxSandboxExecutable)?;
|
||||
let mut args =
|
||||
create_linux_sandbox_command_args(command.clone(), policy, sandbox_policy_cwd);
|
||||
let mut full_command = Vec::with_capacity(1 + args.len());
|
||||
full_command.push(exe.to_string_lossy().to_string());
|
||||
full_command.append(&mut args);
|
||||
(
|
||||
full_command,
|
||||
HashMap::new(),
|
||||
Some("codex-linux-sandbox".to_string()),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
env.extend(sandbox_env);
|
||||
|
||||
Ok(ExecEnv {
|
||||
command,
|
||||
cwd: spec.cwd.clone(),
|
||||
env,
|
||||
timeout_ms: spec.timeout_ms,
|
||||
sandbox,
|
||||
with_escalated_permissions: spec.with_escalated_permissions,
|
||||
justification: spec.justification.clone(),
|
||||
arg0: arg0_override,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn denied(&self, sandbox: SandboxType, out: &ExecToolCallOutput) -> bool {
|
||||
crate::exec::is_likely_sandbox_denied(sandbox, out)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn execute_env(
|
||||
env: &ExecEnv,
|
||||
policy: &SandboxPolicy,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
) -> crate::error::Result<ExecToolCallOutput> {
|
||||
execute_exec_env(env.clone(), policy, stdout_stream).await
|
||||
}
|
||||
@@ -14,7 +14,7 @@ const MACOS_SEATBELT_BASE_POLICY: &str = include_str!("seatbelt_base_policy.sbpl
|
||||
/// to defend against an attacker trying to inject a malicious version on the
|
||||
/// PATH. If /usr/bin/sandbox-exec has been tampered with, then the attacker
|
||||
/// already has root access.
|
||||
const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec";
|
||||
pub(crate) const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec";
|
||||
|
||||
pub async fn spawn_command_under_seatbelt(
|
||||
command: Vec<String>,
|
||||
@@ -39,7 +39,7 @@ pub async fn spawn_command_under_seatbelt(
|
||||
.await
|
||||
}
|
||||
|
||||
fn create_seatbelt_command_args(
|
||||
pub(crate) fn create_seatbelt_command_args(
|
||||
command: Vec<String>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
sandbox_policy_cwd: &Path,
|
||||
|
||||
@@ -347,6 +347,7 @@ mod tests {
|
||||
)]),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
},
|
||||
SandboxType::None,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
@@ -455,6 +456,7 @@ mod macos_tests {
|
||||
)]),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
},
|
||||
SandboxType::None,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
|
||||
@@ -2,9 +2,8 @@ use std::sync::Arc;
|
||||
|
||||
use crate::AuthManager;
|
||||
use crate::RolloutRecorder;
|
||||
use crate::exec_command::ExecSessionManager;
|
||||
use crate::executor::Executor;
|
||||
use crate::mcp_connection_manager::McpConnectionManager;
|
||||
use crate::tools::sandboxing::ApprovalStore;
|
||||
use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use crate::user_notification::UserNotifier;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
@@ -12,13 +11,12 @@ use tokio::sync::Mutex;
|
||||
|
||||
pub(crate) struct SessionServices {
|
||||
pub(crate) mcp_connection_manager: McpConnectionManager,
|
||||
pub(crate) session_manager: ExecSessionManager,
|
||||
pub(crate) unified_exec_manager: UnifiedExecSessionManager,
|
||||
pub(crate) notifier: UserNotifier,
|
||||
pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
|
||||
pub(crate) user_shell: crate::shell::Shell,
|
||||
pub(crate) show_raw_agent_reasoning: bool,
|
||||
pub(crate) executor: Executor,
|
||||
pub(crate) auth_manager: Arc<AuthManager>,
|
||||
pub(crate) otel_event_manager: OtelEventManager,
|
||||
pub(crate) tool_approvals: Mutex<ApprovalStore>,
|
||||
}
|
||||
|
||||
@@ -232,6 +232,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct ExecCommandContext {
|
||||
pub(crate) sub_id: String,
|
||||
pub(crate) call_id: String,
|
||||
@@ -243,6 +244,7 @@ pub(crate) struct ExecCommandContext {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct ApplyPatchCommandContext {
|
||||
pub(crate) user_explicitly_approved_this_action: bool,
|
||||
pub(crate) changes: HashMap<PathBuf, FileChange>,
|
||||
|
||||
235
codex-rs/core/src/tools/events.rs
Normal file
235
codex-rs/core/src/tools/events.rs
Normal file
@@ -0,0 +1,235 @@
|
||||
use crate::codex::Session;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::parse_command::parse_command;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecCommandBeginEvent;
|
||||
use crate::protocol::ExecCommandEndEvent;
|
||||
use crate::protocol::FileChange;
|
||||
use crate::protocol::PatchApplyBeginEvent;
|
||||
use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::TurnDiffEvent;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::format_exec_output;
|
||||
use super::format_exec_output_str;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct ToolEventCtx<'a> {
|
||||
pub session: &'a Session,
|
||||
pub sub_id: &'a str,
|
||||
pub call_id: &'a str,
|
||||
pub turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
|
||||
}
|
||||
|
||||
impl<'a> ToolEventCtx<'a> {
|
||||
pub fn new(
|
||||
session: &'a Session,
|
||||
sub_id: &'a str,
|
||||
call_id: &'a str,
|
||||
turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
|
||||
) -> Self {
|
||||
Self {
|
||||
session,
|
||||
sub_id,
|
||||
call_id,
|
||||
turn_diff_tracker,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum ToolEventStage {
|
||||
Begin,
|
||||
Success(ExecToolCallOutput),
|
||||
Failure(ToolEventFailure),
|
||||
}
|
||||
|
||||
pub(crate) enum ToolEventFailure {
|
||||
Output(ExecToolCallOutput),
|
||||
Message(String),
|
||||
}
|
||||
// Concrete, allocation-free emitter: avoid trait objects and boxed futures.
|
||||
pub(crate) enum ToolEmitter {
|
||||
Shell {
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
},
|
||||
ApplyPatch {
|
||||
changes: HashMap<PathBuf, FileChange>,
|
||||
auto_approved: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl ToolEmitter {
|
||||
pub fn shell(command: Vec<String>, cwd: PathBuf) -> Self {
|
||||
Self::Shell { command, cwd }
|
||||
}
|
||||
|
||||
pub fn apply_patch(changes: HashMap<PathBuf, FileChange>, auto_approved: bool) -> Self {
|
||||
Self::ApplyPatch {
|
||||
changes,
|
||||
auto_approved,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn emit(&self, ctx: ToolEventCtx<'_>, stage: ToolEventStage) {
|
||||
match (self, stage) {
|
||||
(Self::Shell { command, cwd }, ToolEventStage::Begin) => {
|
||||
ctx.session
|
||||
.send_event(Event {
|
||||
id: ctx.sub_id.to_string(),
|
||||
msg: EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parse_command(command),
|
||||
}),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Output(output))) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Message(message))) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
String::new(),
|
||||
(*message).to_string(),
|
||||
(*message).to_string(),
|
||||
-1,
|
||||
Duration::ZERO,
|
||||
format_exec_output(&message),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
(
|
||||
Self::ApplyPatch {
|
||||
changes,
|
||||
auto_approved,
|
||||
},
|
||||
ToolEventStage::Begin,
|
||||
) => {
|
||||
if let Some(tracker) = ctx.turn_diff_tracker {
|
||||
let mut guard = tracker.lock().await;
|
||||
guard.on_patch_begin(changes);
|
||||
}
|
||||
ctx.session
|
||||
.send_event(Event {
|
||||
id: ctx.sub_id.to_string(),
|
||||
msg: EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
auto_approved: *auto_approved,
|
||||
changes: changes.clone(),
|
||||
}),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
(Self::ApplyPatch { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_patch_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.exit_code == 0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::ApplyPatch { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Output(output)),
|
||||
) => {
|
||||
emit_patch_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.exit_code == 0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::ApplyPatch { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Message(message)),
|
||||
) => {
|
||||
emit_patch_end(ctx, String::new(), (*message).to_string(), false).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn emit_exec_end(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
stdout: String,
|
||||
stderr: String,
|
||||
aggregated_output: String,
|
||||
exit_code: i32,
|
||||
duration: Duration,
|
||||
formatted_output: String,
|
||||
) {
|
||||
ctx.session
|
||||
.send_event(Event {
|
||||
id: ctx.sub_id.to_string(),
|
||||
msg: EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
stdout,
|
||||
stderr,
|
||||
aggregated_output,
|
||||
exit_code,
|
||||
duration,
|
||||
formatted_output,
|
||||
}),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn emit_patch_end(ctx: ToolEventCtx<'_>, stdout: String, stderr: String, success: bool) {
|
||||
ctx.session
|
||||
.send_event(Event {
|
||||
id: ctx.sub_id.to_string(),
|
||||
msg: EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
stdout,
|
||||
stderr,
|
||||
success,
|
||||
}),
|
||||
})
|
||||
.await;
|
||||
|
||||
if let Some(tracker) = ctx.turn_diff_tracker {
|
||||
let unified_diff = {
|
||||
let mut guard = tracker.lock().await;
|
||||
guard.get_unified_diff()
|
||||
};
|
||||
if let Ok(Some(unified_diff)) = unified_diff {
|
||||
ctx.session
|
||||
.send_event(Event {
|
||||
id: ctx.sub_id.to_string(),
|
||||
msg: EventMsg::TurnDiff(TurnDiffEvent { unified_diff }),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8,7 +8,6 @@ use crate::client_common::tools::ResponsesApiTool;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::openai_tools::JsonSchema;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
@@ -16,6 +15,7 @@ use crate::tools::handle_container_exec_with_params;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
use crate::tools::spec::ApplyPatchToolArgs;
|
||||
use crate::tools::spec::JsonSchema;
|
||||
use async_trait::async_trait;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
@@ -72,6 +72,7 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
};
|
||||
|
||||
let content = handle_container_exec_with_params(
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
|
||||
use crate::exec_command::ExecCommandParams;
|
||||
use crate::exec_command::WRITE_STDIN_TOOL_NAME;
|
||||
use crate::exec_command::WriteStdinParams;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
|
||||
pub struct ExecStreamHandler;
|
||||
|
||||
#[async_trait]
|
||||
impl ToolHandler for ExecStreamHandler {
|
||||
fn kind(&self) -> ToolKind {
|
||||
ToolKind::Function
|
||||
}
|
||||
|
||||
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
|
||||
let ToolInvocation {
|
||||
session,
|
||||
tool_name,
|
||||
payload,
|
||||
..
|
||||
} = invocation;
|
||||
|
||||
let arguments = match payload {
|
||||
ToolPayload::Function { arguments } => arguments,
|
||||
_ => {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"exec_stream handler received unsupported payload".to_string(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let content = match tool_name.as_str() {
|
||||
EXEC_COMMAND_TOOL_NAME => {
|
||||
let params: ExecCommandParams = serde_json::from_str(&arguments).map_err(|e| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"failed to parse function arguments: {e:?}"
|
||||
))
|
||||
})?;
|
||||
session.handle_exec_command_tool(params).await?
|
||||
}
|
||||
WRITE_STDIN_TOOL_NAME => {
|
||||
let params: WriteStdinParams = serde_json::from_str(&arguments).map_err(|e| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"failed to parse function arguments: {e:?}"
|
||||
))
|
||||
})?;
|
||||
session.handle_write_stdin_tool(params).await?
|
||||
}
|
||||
_ => {
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"exec_stream handler does not support tool {tool_name}"
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
Ok(ToolOutput::Function {
|
||||
content,
|
||||
success: Some(true),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
pub mod apply_patch;
|
||||
mod exec_stream;
|
||||
mod grep_files;
|
||||
mod list_dir;
|
||||
mod mcp;
|
||||
@@ -14,7 +13,6 @@ mod view_image;
|
||||
pub use plan::PLAN_TOOL;
|
||||
|
||||
pub use apply_patch::ApplyPatchHandler;
|
||||
pub use exec_stream::ExecStreamHandler;
|
||||
pub use grep_files::GrepFilesHandler;
|
||||
pub use list_dir::ListDirHandler;
|
||||
pub use mcp::McpHandler;
|
||||
|
||||
@@ -2,12 +2,12 @@ use crate::client_common::tools::ResponsesApiTool;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::codex::Session;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::openai_tools::JsonSchema;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
use crate::tools::spec::JsonSchema;
|
||||
use async_trait::async_trait;
|
||||
use codex_protocol::plan_tool::UpdatePlanArgs;
|
||||
use codex_protocol::protocol::Event;
|
||||
|
||||
@@ -24,6 +24,7 @@ impl ShellHandler {
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification,
|
||||
arg0: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +35,13 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
|
||||
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
|
||||
let ToolInvocation {
|
||||
session, payload, ..
|
||||
session,
|
||||
turn,
|
||||
sub_id,
|
||||
call_id,
|
||||
tool_name: _tool_name,
|
||||
payload,
|
||||
..
|
||||
} = invocation;
|
||||
|
||||
let args = match payload {
|
||||
@@ -73,13 +79,23 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
};
|
||||
|
||||
let request = UnifiedExecRequest {
|
||||
session_id: parsed_session_id,
|
||||
input_chunks: &input,
|
||||
timeout_ms,
|
||||
};
|
||||
|
||||
let value = session
|
||||
.run_unified_exec_request(request)
|
||||
.services
|
||||
.unified_exec_manager
|
||||
.handle_request(
|
||||
request,
|
||||
crate::unified_exec::UnifiedExecContext {
|
||||
session: &session,
|
||||
turn: turn.as_ref(),
|
||||
sub_id: &sub_id,
|
||||
call_id: &call_id,
|
||||
session_id: parsed_session_id,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!("unified exec failed: {err:?}"))
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
pub mod context;
|
||||
pub mod events;
|
||||
pub(crate) mod handlers;
|
||||
pub mod orchestrator;
|
||||
pub mod parallel;
|
||||
pub mod registry;
|
||||
pub mod router;
|
||||
pub mod runtimes;
|
||||
pub mod sandboxing;
|
||||
pub mod spec;
|
||||
|
||||
use crate::apply_patch;
|
||||
use crate::apply_patch::ApplyPatchExec;
|
||||
use crate::apply_patch::InternalApplyPatchInvocation;
|
||||
use crate::apply_patch::convert_apply_patch_to_protocol;
|
||||
use crate::codex::Session;
|
||||
@@ -15,14 +18,19 @@ use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::StdoutStream;
|
||||
use crate::executor::ExecutionMode;
|
||||
use crate::executor::errors::ExecError;
|
||||
use crate::executor::linkers::PreparedExec;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::ApplyPatchCommandContext;
|
||||
use crate::tools::context::ExecCommandContext;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
use crate::tools::events::ToolEventCtx;
|
||||
use crate::tools::events::ToolEventFailure;
|
||||
use crate::tools::events::ToolEventStage;
|
||||
use crate::tools::orchestrator::ToolOrchestrator;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::runtimes::shell::ShellRequest;
|
||||
use crate::tools::runtimes::shell::ShellRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use codex_apply_patch::MaybeApplyPatchVerified;
|
||||
use codex_apply_patch::maybe_parse_apply_patch_verified;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
@@ -56,7 +64,7 @@ pub(crate) async fn handle_container_exec_with_params(
|
||||
sub_id: String,
|
||||
call_id: String,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
let otel_event_manager = turn_context.client.get_otel_event_manager();
|
||||
let _otel_event_manager = turn_context.client.get_otel_event_manager();
|
||||
|
||||
if params.with_escalated_permissions.unwrap_or(false)
|
||||
&& !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
|
||||
@@ -100,86 +108,142 @@ pub(crate) async fn handle_container_exec_with_params(
|
||||
MaybeApplyPatchVerified::NotApplyPatch => None,
|
||||
};
|
||||
|
||||
let command_for_display = if let Some(exec) = apply_patch_exec.as_ref() {
|
||||
vec!["apply_patch".to_string(), exec.action.patch.clone()]
|
||||
} else {
|
||||
params.command.clone()
|
||||
};
|
||||
|
||||
let exec_command_context = ExecCommandContext {
|
||||
sub_id: sub_id.clone(),
|
||||
call_id: call_id.clone(),
|
||||
command_for_display: command_for_display.clone(),
|
||||
cwd: params.cwd.clone(),
|
||||
apply_patch: apply_patch_exec.as_ref().map(
|
||||
|ApplyPatchExec {
|
||||
action,
|
||||
user_explicitly_approved_this_action,
|
||||
}| ApplyPatchCommandContext {
|
||||
user_explicitly_approved_this_action: *user_explicitly_approved_this_action,
|
||||
changes: convert_apply_patch_to_protocol(action),
|
||||
},
|
||||
let (event_emitter, diff_opt) = match apply_patch_exec.as_ref() {
|
||||
Some(exec) => (
|
||||
ToolEmitter::apply_patch(
|
||||
convert_apply_patch_to_protocol(&exec.action),
|
||||
!exec.user_explicitly_approved_this_action,
|
||||
),
|
||||
Some(&turn_diff_tracker),
|
||||
),
|
||||
None => (
|
||||
ToolEmitter::shell(params.command.clone(), params.cwd.clone()),
|
||||
None,
|
||||
),
|
||||
tool_name: tool_name.to_string(),
|
||||
otel_event_manager,
|
||||
};
|
||||
|
||||
let mode = match apply_patch_exec {
|
||||
Some(exec) => ExecutionMode::ApplyPatch(exec),
|
||||
None => ExecutionMode::Shell,
|
||||
};
|
||||
let event_ctx = ToolEventCtx::new(sess.as_ref(), &sub_id, &call_id, diff_opt);
|
||||
event_emitter.emit(event_ctx, ToolEventStage::Begin).await;
|
||||
|
||||
sess.services.executor.update_environment(
|
||||
turn_context.sandbox_policy.clone(),
|
||||
turn_context.cwd.clone(),
|
||||
);
|
||||
// Build runtime contexts only when needed (shell/apply_patch below).
|
||||
|
||||
let prepared_exec = PreparedExec::new(
|
||||
exec_command_context,
|
||||
params,
|
||||
command_for_display,
|
||||
mode,
|
||||
Some(StdoutStream {
|
||||
if let Some(exec) = apply_patch_exec {
|
||||
// Route apply_patch execution through the new orchestrator/runtime.
|
||||
let req = ApplyPatchRequest {
|
||||
patch: exec.action.patch.clone(),
|
||||
cwd: params.cwd.clone(),
|
||||
timeout_ms: params.timeout_ms,
|
||||
user_explicitly_approved: exec.user_explicitly_approved_this_action,
|
||||
codex_exe: turn_context.codex_linux_sandbox_exe.clone(),
|
||||
};
|
||||
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ApplyPatchRuntime::new();
|
||||
let tool_ctx = ToolCtx {
|
||||
session: sess.as_ref(),
|
||||
sub_id: sub_id.clone(),
|
||||
call_id: call_id.clone(),
|
||||
tx_event: sess.get_tx_event(),
|
||||
}),
|
||||
turn_context.shell_environment_policy.use_profile,
|
||||
);
|
||||
tool_name: tool_name.to_string(),
|
||||
};
|
||||
|
||||
let output_result = sess
|
||||
.run_exec_with_events(
|
||||
turn_diff_tracker.clone(),
|
||||
prepared_exec,
|
||||
turn_context.approval_policy,
|
||||
)
|
||||
.await;
|
||||
let out = orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&tool_ctx,
|
||||
&turn_context,
|
||||
turn_context.approval_policy,
|
||||
)
|
||||
.await;
|
||||
|
||||
// always make sure to truncate the output if its length isn't controlled.
|
||||
match output_result {
|
||||
handle_exec_outcome(&event_emitter, event_ctx, out).await
|
||||
} else {
|
||||
// Route shell execution through the new orchestrator/runtime.
|
||||
let req = ShellRequest {
|
||||
command: params.command.clone(),
|
||||
cwd: params.cwd.clone(),
|
||||
timeout_ms: params.timeout_ms,
|
||||
env: params.env.clone(),
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification.clone(),
|
||||
};
|
||||
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ShellRuntime::new();
|
||||
let tool_ctx = ToolCtx {
|
||||
session: sess.as_ref(),
|
||||
sub_id: sub_id.clone(),
|
||||
call_id: call_id.clone(),
|
||||
tool_name: tool_name.to_string(),
|
||||
};
|
||||
|
||||
let out = orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&tool_ctx,
|
||||
&turn_context,
|
||||
turn_context.approval_policy,
|
||||
)
|
||||
.await;
|
||||
|
||||
handle_exec_outcome(&event_emitter, event_ctx, out).await
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_exec_outcome(
|
||||
event_emitter: &ToolEmitter,
|
||||
event_ctx: ToolEventCtx<'_>,
|
||||
out: Result<ExecToolCallOutput, ToolError>,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
let event;
|
||||
let result = match out {
|
||||
Ok(output) => {
|
||||
let ExecToolCallOutput { exit_code, .. } = &output;
|
||||
let content = format_exec_output_apply_patch(&output);
|
||||
if *exit_code == 0 {
|
||||
let content = format_exec_output_for_model(&output);
|
||||
let exit_code = output.exit_code;
|
||||
event = ToolEventStage::Success(output);
|
||||
if exit_code == 0 {
|
||||
Ok(content)
|
||||
} else {
|
||||
Err(FunctionCallError::RespondToModel(content))
|
||||
}
|
||||
}
|
||||
Err(ExecError::Function(err)) => Err(truncate_function_error(err)),
|
||||
Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err(
|
||||
FunctionCallError::RespondToModel(format_exec_output_apply_patch(&output)),
|
||||
),
|
||||
Err(ExecError::Codex(err)) => {
|
||||
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
|
||||
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
|
||||
let response = format_exec_output_for_model(&output);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
|
||||
Err(FunctionCallError::RespondToModel(response))
|
||||
}
|
||||
Err(ToolError::Codex(err)) => {
|
||||
let message = format!("execution error: {err:?}");
|
||||
let response = format_exec_output(&message);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Message(message));
|
||||
Err(FunctionCallError::RespondToModel(format_exec_output(
|
||||
&message,
|
||||
&response,
|
||||
)))
|
||||
}
|
||||
}
|
||||
Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
|
||||
// Normalize common rejection messages for exec tools so tests and
|
||||
// users see a clear, consistent phrase.
|
||||
let normalized = if msg == "rejected by user" {
|
||||
"exec command rejected by user".to_string()
|
||||
} else {
|
||||
msg
|
||||
};
|
||||
let response = format_exec_output(&normalized);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
|
||||
Err(FunctionCallError::RespondToModel(format_exec_output(
|
||||
&response,
|
||||
)))
|
||||
}
|
||||
};
|
||||
event_emitter.emit(event_ctx, event).await;
|
||||
result
|
||||
}
|
||||
|
||||
pub fn format_exec_output_apply_patch(exec_output: &ExecToolCallOutput) -> String {
|
||||
/// Format the combined exec output for sending back to the model.
|
||||
/// Includes exit code and duration metadata; truncates large bodies safely.
|
||||
pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
|
||||
let ExecToolCallOutput {
|
||||
exit_code,
|
||||
duration,
|
||||
@@ -233,18 +297,7 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
||||
format_exec_output(content)
|
||||
}
|
||||
|
||||
fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
|
||||
match err {
|
||||
FunctionCallError::RespondToModel(msg) => {
|
||||
FunctionCallError::RespondToModel(format_exec_output(&msg))
|
||||
}
|
||||
FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
|
||||
FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
fn format_exec_output(content: &str) -> String {
|
||||
pub(super) fn format_exec_output(content: &str) -> String {
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
let total_lines = content.lines().count();
|
||||
@@ -315,6 +368,17 @@ mod tests {
|
||||
use super::*;
|
||||
use regex_lite::Regex;
|
||||
|
||||
fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
|
||||
match err {
|
||||
FunctionCallError::RespondToModel(msg) => {
|
||||
FunctionCallError::RespondToModel(format_exec_output(&msg))
|
||||
}
|
||||
FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
|
||||
FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
|
||||
let pattern = truncated_message_pattern(line, total_lines);
|
||||
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
|
||||
|
||||
172
codex-rs/core/src/tools/orchestrator.rs
Normal file
172
codex-rs/core/src/tools/orchestrator.rs
Normal file
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
Module: orchestrator
|
||||
|
||||
Central place for approvals + sandbox selection + retry semantics. Drives a
|
||||
simple sequence for any ToolRuntime: approval → select sandbox → attempt →
|
||||
retry without sandbox on denial (no re‑approval thanks to caching).
|
||||
*/
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
|
||||
pub(crate) struct ToolOrchestrator {
|
||||
sandbox: SandboxManager,
|
||||
}
|
||||
|
||||
impl ToolOrchestrator {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
sandbox: SandboxManager::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run<Rq, Out, T>(
|
||||
&mut self,
|
||||
tool: &mut T,
|
||||
req: &Rq,
|
||||
tool_ctx: &ToolCtx<'_>,
|
||||
turn_ctx: &crate::codex::TurnContext,
|
||||
approval_policy: AskForApproval,
|
||||
) -> Result<Out, ToolError>
|
||||
where
|
||||
T: ToolRuntime<Rq, Out>,
|
||||
{
|
||||
let otel = turn_ctx.client.get_otel_event_manager();
|
||||
let otel_tn = &tool_ctx.tool_name;
|
||||
let otel_ci = &tool_ctx.call_id;
|
||||
let otel_user = codex_otel::otel_event_manager::ToolDecisionSource::User;
|
||||
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
|
||||
|
||||
// 1) Approval
|
||||
let needs_initial_approval =
|
||||
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
|
||||
let mut already_approved = false;
|
||||
|
||||
if needs_initial_approval {
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
sub_id: &tool_ctx.sub_id,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: None,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
}
|
||||
already_approved = true;
|
||||
} else {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
|
||||
// 2) First attempt under the selected sandbox.
|
||||
let mut initial_sandbox = self
|
||||
.sandbox
|
||||
.select_initial(&turn_ctx.sandbox_policy, tool.sandbox_preference());
|
||||
if tool.wants_escalated_first_attempt(req) {
|
||||
initial_sandbox = crate::exec::SandboxType::None;
|
||||
}
|
||||
let initial_attempt = SandboxAttempt {
|
||||
sandbox: initial_sandbox,
|
||||
policy: &turn_ctx.sandbox_policy,
|
||||
manager: &self.sandbox,
|
||||
sandbox_cwd: &turn_ctx.cwd,
|
||||
codex_linux_sandbox_exe: turn_ctx.codex_linux_sandbox_exe.as_ref(),
|
||||
};
|
||||
|
||||
match tool.run(req, &initial_attempt, tool_ctx).await {
|
||||
Ok(out) => {
|
||||
// We have a successful initial result
|
||||
Ok(out)
|
||||
}
|
||||
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
|
||||
if !tool.escalate_on_failure() {
|
||||
return Err(ToolError::SandboxDenied(
|
||||
"sandbox denied and no retry".to_string(),
|
||||
));
|
||||
}
|
||||
// Under `Never`, do not retry without sandbox; surface a concise message
|
||||
// derived from the actual output (platform-agnostic).
|
||||
if matches!(approval_policy, AskForApproval::Never) {
|
||||
let msg = build_never_denied_message_from_output(output.as_ref());
|
||||
return Err(ToolError::SandboxDenied(msg));
|
||||
}
|
||||
|
||||
// Ask for approval before retrying without sandbox.
|
||||
if !tool.should_bypass_approval(approval_policy, already_approved) {
|
||||
let reason_msg = build_denial_reason_from_output(output.as_ref());
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
sub_id: &tool_ctx.sub_id,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: Some(reason_msg),
|
||||
};
|
||||
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user);
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
}
|
||||
}
|
||||
|
||||
let escalated_attempt = SandboxAttempt {
|
||||
sandbox: crate::exec::SandboxType::None,
|
||||
policy: &turn_ctx.sandbox_policy,
|
||||
manager: &self.sandbox,
|
||||
sandbox_cwd: &turn_ctx.cwd,
|
||||
codex_linux_sandbox_exe: None,
|
||||
};
|
||||
|
||||
// Second attempt.
|
||||
(*tool).run(req, &escalated_attempt, tool_ctx).await
|
||||
}
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn build_never_denied_message_from_output(output: &ExecToolCallOutput) -> String {
|
||||
let body = format!(
|
||||
"{}\n{}\n{}",
|
||||
output.stderr.text, output.stdout.text, output.aggregated_output.text
|
||||
)
|
||||
.to_lowercase();
|
||||
|
||||
let detail = if body.contains("permission denied") {
|
||||
Some("Permission denied")
|
||||
} else if body.contains("operation not permitted") {
|
||||
Some("Operation not permitted")
|
||||
} else if body.contains("read-only file system") {
|
||||
Some("Read-only file system")
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
match detail {
|
||||
Some(tag) => format!("failed in sandbox: {tag}"),
|
||||
None => "failed in sandbox".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_denial_reason_from_output(_output: &ExecToolCallOutput) -> String {
|
||||
// Keep approval reason terse and stable for UX/tests, but accept the
|
||||
// output so we can evolve heuristics later without touching call sites.
|
||||
"command failed; retry without sandbox?".to_string()
|
||||
}
|
||||
148
codex-rs/core/src/tools/runtimes/apply_patch.rs
Normal file
148
codex-rs/core/src/tools/runtimes/apply_patch.rs
Normal file
@@ -0,0 +1,148 @@
|
||||
//! Apply Patch runtime: executes verified patches under the orchestrator.
|
||||
//!
|
||||
//! Assumes `apply_patch` verification/approval happened upstream. Reuses that
|
||||
//! decision to avoid re-prompting, builds the self-invocation command for
|
||||
//! `codex --codex-run-as-apply-patch`, and runs under the current
|
||||
//! `SandboxAttempt` with a minimal environment.
|
||||
use crate::CODEX_APPLY_PATCH_ARG1;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ApplyPatchRequest {
|
||||
pub patch: String,
|
||||
pub cwd: PathBuf,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub user_explicitly_approved: bool,
|
||||
pub codex_exe: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ApplyPatchRuntime;
|
||||
|
||||
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub(crate) struct ApprovalKey {
|
||||
patch: String,
|
||||
cwd: PathBuf,
|
||||
}
|
||||
|
||||
impl ApplyPatchRuntime {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn build_command_spec(req: &ApplyPatchRequest) -> Result<CommandSpec, ToolError> {
|
||||
use std::env;
|
||||
let exe = if let Some(path) = &req.codex_exe {
|
||||
path.clone()
|
||||
} else {
|
||||
env::current_exe()
|
||||
.map_err(|e| ToolError::Rejected(format!("failed to determine codex exe: {e}")))?
|
||||
};
|
||||
let program = exe.to_string_lossy().to_string();
|
||||
Ok(CommandSpec {
|
||||
program,
|
||||
args: vec![CODEX_APPLY_PATCH_ARG1.to_string(), req.patch.clone()],
|
||||
cwd: req.cwd.clone(),
|
||||
timeout_ms: req.timeout_ms,
|
||||
// Run apply_patch with a minimal environment for determinism and to avoid leaks.
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn stdout_stream(ctx: &ToolCtx<'_>) -> Option<crate::exec::StdoutStream> {
|
||||
Some(crate::exec::StdoutStream {
|
||||
sub_id: ctx.sub_id.clone(),
|
||||
call_id: ctx.call_id.clone(),
|
||||
tx_event: ctx.session.get_tx_event(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Sandboxable for ApplyPatchRuntime {
|
||||
fn sandbox_preference(&self) -> SandboxablePreference {
|
||||
SandboxablePreference::Auto
|
||||
}
|
||||
fn escalate_on_failure(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
|
||||
type ApprovalKey = ApprovalKey;
|
||||
|
||||
fn approval_key(&self, req: &ApplyPatchRequest) -> Self::ApprovalKey {
|
||||
ApprovalKey {
|
||||
patch: req.patch.clone(),
|
||||
cwd: req.cwd.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn start_approval_async<'a>(
|
||||
&'a mut self,
|
||||
req: &'a ApplyPatchRequest,
|
||||
ctx: ApprovalCtx<'a>,
|
||||
) -> BoxFuture<'a, ReviewDecision> {
|
||||
let key = self.approval_key(req);
|
||||
let session = ctx.session;
|
||||
let sub_id = ctx.sub_id.to_string();
|
||||
let call_id = ctx.call_id.to_string();
|
||||
let cwd = req.cwd.clone();
|
||||
let retry_reason = ctx.retry_reason.clone();
|
||||
let user_explicitly_approved = req.user_explicitly_approved;
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
if let Some(reason) = retry_reason {
|
||||
session
|
||||
.request_command_approval(
|
||||
sub_id,
|
||||
call_id,
|
||||
vec!["apply_patch".to_string()],
|
||||
cwd,
|
||||
Some(reason),
|
||||
)
|
||||
.await
|
||||
} else if user_explicitly_approved {
|
||||
ReviewDecision::ApprovedForSession
|
||||
} else {
|
||||
ReviewDecision::Approved
|
||||
}
|
||||
})
|
||||
.await
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
|
||||
async fn run(
|
||||
&mut self,
|
||||
req: &ApplyPatchRequest,
|
||||
attempt: &SandboxAttempt<'_>,
|
||||
ctx: &ToolCtx<'_>,
|
||||
) -> Result<ExecToolCallOutput, ToolError> {
|
||||
let spec = Self::build_command_spec(req)?;
|
||||
let env = attempt
|
||||
.env_for(&spec)
|
||||
.map_err(|err| ToolError::Codex(err.into()))?;
|
||||
let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
|
||||
.await
|
||||
.map_err(ToolError::Codex)?;
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
38
codex-rs/core/src/tools/runtimes/mod.rs
Normal file
38
codex-rs/core/src/tools/runtimes/mod.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
Module: runtimes
|
||||
|
||||
Concrete ToolRuntime implementations for specific tools. Each runtime stays
|
||||
small and focused and reuses the orchestrator for approvals + sandbox + retry.
|
||||
*/
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
pub mod apply_patch;
|
||||
pub mod shell;
|
||||
pub mod unified_exec;
|
||||
|
||||
/// Shared helper to construct a CommandSpec from a tokenized command line.
|
||||
/// Validates that at least a program is present.
|
||||
pub(crate) fn build_command_spec(
|
||||
command: &[String],
|
||||
cwd: &Path,
|
||||
env: &HashMap<String, String>,
|
||||
timeout_ms: Option<u64>,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
) -> Result<CommandSpec, ToolError> {
|
||||
let (program, args) = command
|
||||
.split_first()
|
||||
.ok_or_else(|| ToolError::Rejected("command args are empty".to_string()))?;
|
||||
Ok(CommandSpec {
|
||||
program: program.clone(),
|
||||
args: args.to_vec(),
|
||||
cwd: cwd.to_path_buf(),
|
||||
env: env.clone(),
|
||||
timeout_ms,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
})
|
||||
}
|
||||
164
codex-rs/core/src/tools/runtimes/shell.rs
Normal file
164
codex-rs/core/src/tools/runtimes/shell.rs
Normal file
@@ -0,0 +1,164 @@
|
||||
/*
|
||||
Runtime: shell
|
||||
|
||||
Executes shell requests under the orchestrator: asks for approval when needed,
|
||||
builds a CommandSpec, and runs it under the current SandboxAttempt.
|
||||
*/
|
||||
use crate::command_safety::is_dangerous_command::command_might_be_dangerous;
|
||||
use crate::command_safety::is_safe_command::is_known_safe_command;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ShellRequest {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub env: std::collections::HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ShellRuntime;
|
||||
|
||||
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub(crate) struct ApprovalKey {
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
escalated: bool,
|
||||
}
|
||||
|
||||
impl ShellRuntime {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn stdout_stream(ctx: &ToolCtx<'_>) -> Option<crate::exec::StdoutStream> {
|
||||
Some(crate::exec::StdoutStream {
|
||||
sub_id: ctx.sub_id.clone(),
|
||||
call_id: ctx.call_id.clone(),
|
||||
tx_event: ctx.session.get_tx_event(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Sandboxable for ShellRuntime {
|
||||
fn sandbox_preference(&self) -> SandboxablePreference {
|
||||
SandboxablePreference::Auto
|
||||
}
|
||||
fn escalate_on_failure(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Approvable<ShellRequest> for ShellRuntime {
|
||||
type ApprovalKey = ApprovalKey;
|
||||
|
||||
fn approval_key(&self, req: &ShellRequest) -> Self::ApprovalKey {
|
||||
ApprovalKey {
|
||||
command: req.command.clone(),
|
||||
cwd: req.cwd.clone(),
|
||||
escalated: req.with_escalated_permissions.unwrap_or(false),
|
||||
}
|
||||
}
|
||||
|
||||
fn start_approval_async<'a>(
|
||||
&'a mut self,
|
||||
req: &'a ShellRequest,
|
||||
ctx: ApprovalCtx<'a>,
|
||||
) -> BoxFuture<'a, ReviewDecision> {
|
||||
let key = self.approval_key(req);
|
||||
let command = req.command.clone();
|
||||
let cwd = req.cwd.clone();
|
||||
let reason = ctx
|
||||
.retry_reason
|
||||
.clone()
|
||||
.or_else(|| req.justification.clone());
|
||||
let session = ctx.session;
|
||||
let sub_id = ctx.sub_id.to_string();
|
||||
let call_id = ctx.call_id.to_string();
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
session
|
||||
.request_command_approval(sub_id, call_id, command, cwd, reason)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
})
|
||||
}
|
||||
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &ShellRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
if is_known_safe_command(&req.command) {
|
||||
return false;
|
||||
}
|
||||
match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => {
|
||||
// In DangerFullAccess, only prompt if the command looks dangerous.
|
||||
if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
|
||||
return command_might_be_dangerous(&req.command);
|
||||
}
|
||||
|
||||
// In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for
|
||||
// non‑escalated, non‑dangerous commands — let the sandbox enforce
|
||||
// restrictions (e.g., block network/write) without a user prompt.
|
||||
let wants_escalation = req.with_escalated_permissions.unwrap_or(false);
|
||||
if wants_escalation {
|
||||
return true;
|
||||
}
|
||||
command_might_be_dangerous(&req.command)
|
||||
}
|
||||
AskForApproval::UnlessTrusted => !is_known_safe_command(&req.command),
|
||||
}
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
|
||||
req.with_escalated_permissions.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
|
||||
async fn run(
|
||||
&mut self,
|
||||
req: &ShellRequest,
|
||||
attempt: &SandboxAttempt<'_>,
|
||||
ctx: &ToolCtx<'_>,
|
||||
) -> Result<ExecToolCallOutput, ToolError> {
|
||||
let spec = build_command_spec(
|
||||
&req.command,
|
||||
&req.cwd,
|
||||
&req.env,
|
||||
req.timeout_ms,
|
||||
req.with_escalated_permissions,
|
||||
req.justification.clone(),
|
||||
)?;
|
||||
let env = attempt
|
||||
.env_for(&spec)
|
||||
.map_err(|err| ToolError::Codex(err.into()))?;
|
||||
let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
|
||||
.await
|
||||
.map_err(ToolError::Codex)?;
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
123
codex-rs/core/src/tools/runtimes/unified_exec.rs
Normal file
123
codex-rs/core/src/tools/runtimes/unified_exec.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
Runtime: unified exec
|
||||
|
||||
Handles approval + sandbox orchestration for unified exec requests, delegating to
|
||||
the session manager to spawn PTYs once an ExecEnv is prepared.
|
||||
*/
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use crate::unified_exec::UnifiedExecError;
|
||||
use crate::unified_exec::UnifiedExecSession;
|
||||
use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UnifiedExecRequest {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub env: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub struct UnifiedExecApprovalKey {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
}
|
||||
|
||||
pub struct UnifiedExecRuntime<'a> {
|
||||
manager: &'a UnifiedExecSessionManager,
|
||||
}
|
||||
|
||||
impl UnifiedExecRequest {
|
||||
pub fn new(command: Vec<String>, cwd: PathBuf, env: HashMap<String, String>) -> Self {
|
||||
Self { command, cwd, env }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> UnifiedExecRuntime<'a> {
|
||||
pub fn new(manager: &'a UnifiedExecSessionManager) -> Self {
|
||||
Self { manager }
|
||||
}
|
||||
}
|
||||
|
||||
impl Sandboxable for UnifiedExecRuntime<'_> {
|
||||
fn sandbox_preference(&self) -> SandboxablePreference {
|
||||
SandboxablePreference::Auto
|
||||
}
|
||||
|
||||
fn escalate_on_failure(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
||||
type ApprovalKey = UnifiedExecApprovalKey;
|
||||
|
||||
fn approval_key(&self, req: &UnifiedExecRequest) -> Self::ApprovalKey {
|
||||
UnifiedExecApprovalKey {
|
||||
command: req.command.clone(),
|
||||
cwd: req.cwd.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn start_approval_async<'b>(
|
||||
&'b mut self,
|
||||
req: &'b UnifiedExecRequest,
|
||||
ctx: ApprovalCtx<'b>,
|
||||
) -> BoxFuture<'b, ReviewDecision> {
|
||||
let key = self.approval_key(req);
|
||||
let session = ctx.session;
|
||||
let sub_id = ctx.sub_id.to_string();
|
||||
let call_id = ctx.call_id.to_string();
|
||||
let command = req.command.clone();
|
||||
let cwd = req.cwd.clone();
|
||||
let reason = ctx.retry_reason.clone();
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
session
|
||||
.request_command_approval(sub_id, call_id, command, cwd, reason)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRuntime<'a> {
|
||||
async fn run(
|
||||
&mut self,
|
||||
req: &UnifiedExecRequest,
|
||||
attempt: &SandboxAttempt<'_>,
|
||||
_ctx: &ToolCtx<'_>,
|
||||
) -> Result<UnifiedExecSession, ToolError> {
|
||||
let spec = build_command_spec(&req.command, &req.cwd, &req.env, None, None, None)
|
||||
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
|
||||
let exec_env = attempt
|
||||
.env_for(&spec)
|
||||
.map_err(|err| ToolError::Codex(err.into()))?;
|
||||
self.manager
|
||||
.open_session_with_exec_env(&exec_env)
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
UnifiedExecError::SandboxDenied { output, .. } => {
|
||||
ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied {
|
||||
output: Box::new(output),
|
||||
}))
|
||||
}
|
||||
other => ToolError::Rejected(other.to_string()),
|
||||
})
|
||||
}
|
||||
}
|
||||
190
codex-rs/core/src/tools/sandboxing.rs
Normal file
190
codex-rs/core/src/tools/sandboxing.rs
Normal file
@@ -0,0 +1,190 @@
|
||||
//! Shared approvals and sandboxing traits used by tool runtimes.
|
||||
//!
|
||||
//! Consolidates the approval flow primitives (`ApprovalDecision`, `ApprovalStore`,
|
||||
//! `ApprovalCtx`, `Approvable`) together with the sandbox orchestration traits
|
||||
//! and helpers (`Sandboxable`, `ToolRuntime`, `SandboxAttempt`, etc.).
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::error::CodexErr;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::sandboxing::SandboxTransformError;
|
||||
use crate::state::SessionServices;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
use std::path::Path;
|
||||
|
||||
use futures::Future;
|
||||
use futures::future::BoxFuture;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Default, Debug)]
|
||||
pub(crate) struct ApprovalStore {
|
||||
// Store serialized keys for generic caching across requests.
|
||||
map: HashMap<String, ReviewDecision>,
|
||||
}
|
||||
|
||||
impl ApprovalStore {
|
||||
pub fn get<K>(&self, key: &K) -> Option<ReviewDecision>
|
||||
where
|
||||
K: Serialize,
|
||||
{
|
||||
let s = serde_json::to_string(key).ok()?;
|
||||
self.map.get(&s).cloned()
|
||||
}
|
||||
|
||||
pub fn put<K>(&mut self, key: K, value: ReviewDecision)
|
||||
where
|
||||
K: Serialize,
|
||||
{
|
||||
if let Ok(s) = serde_json::to_string(&key) {
|
||||
self.map.insert(s, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn with_cached_approval<K, F, Fut>(
|
||||
services: &SessionServices,
|
||||
key: K,
|
||||
fetch: F,
|
||||
) -> ReviewDecision
|
||||
where
|
||||
K: Serialize + Clone,
|
||||
F: FnOnce() -> Fut,
|
||||
Fut: Future<Output = ReviewDecision>,
|
||||
{
|
||||
{
|
||||
let store = services.tool_approvals.lock().await;
|
||||
if let Some(decision) = store.get(&key) {
|
||||
return decision;
|
||||
}
|
||||
}
|
||||
|
||||
let decision = fetch().await;
|
||||
|
||||
if matches!(decision, ReviewDecision::ApprovedForSession) {
|
||||
let mut store = services.tool_approvals.lock().await;
|
||||
store.put(key, ReviewDecision::ApprovedForSession);
|
||||
}
|
||||
|
||||
decision
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct ApprovalCtx<'a> {
|
||||
pub session: &'a Session,
|
||||
pub sub_id: &'a str,
|
||||
pub call_id: &'a str,
|
||||
pub retry_reason: Option<String>,
|
||||
}
|
||||
|
||||
pub(crate) trait Approvable<Req> {
|
||||
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
|
||||
|
||||
fn approval_key(&self, req: &Req) -> Self::ApprovalKey;
|
||||
|
||||
/// Some tools may request to skip the sandbox on the first attempt
|
||||
/// (e.g., when the request explicitly asks for escalated permissions).
|
||||
/// Defaults to `false`.
|
||||
fn wants_escalated_first_attempt(&self, _req: &Req) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn should_bypass_approval(&self, policy: AskForApproval, already_approved: bool) -> bool {
|
||||
if already_approved {
|
||||
// We do not ask one more time
|
||||
return true;
|
||||
}
|
||||
matches!(policy, AskForApproval::Never)
|
||||
}
|
||||
|
||||
/// Decide whether an initial user approval should be requested before the
|
||||
/// first attempt. Defaults to the orchestrator's behavior (pre‑refactor):
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
_req: &Req,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
}
|
||||
}
|
||||
|
||||
fn start_approval_async<'a>(
|
||||
&'a mut self,
|
||||
req: &'a Req,
|
||||
ctx: ApprovalCtx<'a>,
|
||||
) -> BoxFuture<'a, ReviewDecision>;
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum SandboxablePreference {
|
||||
Auto,
|
||||
#[allow(dead_code)] // Will be used by later tools.
|
||||
Require,
|
||||
#[allow(dead_code)] // Will be used by later tools.
|
||||
Forbid,
|
||||
}
|
||||
|
||||
pub(crate) trait Sandboxable {
|
||||
fn sandbox_preference(&self) -> SandboxablePreference;
|
||||
fn escalate_on_failure(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct ToolCtx<'a> {
|
||||
pub session: &'a Session,
|
||||
pub sub_id: String,
|
||||
pub call_id: String,
|
||||
pub tool_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum ToolError {
|
||||
Rejected(String),
|
||||
SandboxDenied(String),
|
||||
Codex(CodexErr),
|
||||
}
|
||||
|
||||
pub(crate) trait ToolRuntime<Req, Out>: Approvable<Req> + Sandboxable {
|
||||
async fn run(
|
||||
&mut self,
|
||||
req: &Req,
|
||||
attempt: &SandboxAttempt<'_>,
|
||||
ctx: &ToolCtx,
|
||||
) -> Result<Out, ToolError>;
|
||||
}
|
||||
|
||||
pub(crate) struct SandboxAttempt<'a> {
|
||||
pub sandbox: crate::exec::SandboxType,
|
||||
pub policy: &'a crate::protocol::SandboxPolicy,
|
||||
pub(crate) manager: &'a SandboxManager,
|
||||
pub(crate) sandbox_cwd: &'a Path,
|
||||
pub codex_linux_sandbox_exe: Option<&'a std::path::PathBuf>,
|
||||
}
|
||||
|
||||
impl<'a> SandboxAttempt<'a> {
|
||||
pub fn env_for(
|
||||
&self,
|
||||
spec: &CommandSpec,
|
||||
) -> Result<crate::sandboxing::ExecEnv, SandboxTransformError> {
|
||||
self.manager.transform(
|
||||
spec,
|
||||
self.policy,
|
||||
self.sandbox,
|
||||
self.sandbox_cwd,
|
||||
self.codex_linux_sandbox_exe,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -815,12 +815,7 @@ pub(crate) fn build_specs(
|
||||
config: &ToolsConfig,
|
||||
mcp_tools: Option<HashMap<String, mcp_types::Tool>>,
|
||||
) -> ToolRegistryBuilder {
|
||||
use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
|
||||
use crate::exec_command::WRITE_STDIN_TOOL_NAME;
|
||||
use crate::exec_command::create_exec_command_tool_for_responses_api;
|
||||
use crate::exec_command::create_write_stdin_tool_for_responses_api;
|
||||
use crate::tools::handlers::ApplyPatchHandler;
|
||||
use crate::tools::handlers::ExecStreamHandler;
|
||||
use crate::tools::handlers::GrepFilesHandler;
|
||||
use crate::tools::handlers::ListDirHandler;
|
||||
use crate::tools::handlers::McpHandler;
|
||||
@@ -836,7 +831,6 @@ pub(crate) fn build_specs(
|
||||
let mut builder = ToolRegistryBuilder::new();
|
||||
|
||||
let shell_handler = Arc::new(ShellHandler);
|
||||
let exec_stream_handler = Arc::new(ExecStreamHandler);
|
||||
let unified_exec_handler = Arc::new(UnifiedExecHandler);
|
||||
let plan_handler = Arc::new(PlanHandler);
|
||||
let apply_patch_handler = Arc::new(ApplyPatchHandler);
|
||||
@@ -844,7 +838,10 @@ pub(crate) fn build_specs(
|
||||
let mcp_handler = Arc::new(McpHandler);
|
||||
let mcp_resource_handler = Arc::new(McpResourceHandler);
|
||||
|
||||
if config.experimental_unified_exec_tool {
|
||||
let use_unified_exec = config.experimental_unified_exec_tool
|
||||
|| matches!(config.shell_type, ConfigShellToolType::Streamable);
|
||||
|
||||
if use_unified_exec {
|
||||
builder.push_spec(create_unified_exec_tool());
|
||||
builder.register_handler("unified_exec", unified_exec_handler);
|
||||
} else {
|
||||
@@ -856,14 +853,7 @@ pub(crate) fn build_specs(
|
||||
builder.push_spec(ToolSpec::LocalShell {});
|
||||
}
|
||||
ConfigShellToolType::Streamable => {
|
||||
builder.push_spec(ToolSpec::Function(
|
||||
create_exec_command_tool_for_responses_api(),
|
||||
));
|
||||
builder.push_spec(ToolSpec::Function(
|
||||
create_write_stdin_tool_for_responses_api(),
|
||||
));
|
||||
builder.register_handler(EXEC_COMMAND_TOOL_NAME, exec_stream_handler.clone());
|
||||
builder.register_handler(WRITE_STDIN_TOOL_NAME, exec_stream_handler);
|
||||
// Already handled by use_unified_exec.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,22 +1,29 @@
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub(crate) enum UnifiedExecError {
|
||||
#[error("Failed to create unified exec session: {pty_error}")]
|
||||
CreateSession {
|
||||
#[source]
|
||||
pty_error: anyhow::Error,
|
||||
},
|
||||
#[error("Failed to create unified exec session: {message}")]
|
||||
CreateSession { message: String },
|
||||
#[error("Unknown session id {session_id}")]
|
||||
UnknownSessionId { session_id: i32 },
|
||||
#[error("failed to write to stdin")]
|
||||
WriteToStdin,
|
||||
#[error("missing command line for unified exec request")]
|
||||
MissingCommandLine,
|
||||
#[error("Command denied by sandbox: {message}")]
|
||||
SandboxDenied {
|
||||
message: String,
|
||||
output: ExecToolCallOutput,
|
||||
},
|
||||
}
|
||||
|
||||
impl UnifiedExecError {
|
||||
pub(crate) fn create_session(error: anyhow::Error) -> Self {
|
||||
Self::CreateSession { pty_error: error }
|
||||
pub(crate) fn create_session(message: String) -> Self {
|
||||
Self::CreateSession { message }
|
||||
}
|
||||
|
||||
pub(crate) fn sandbox_denied(message: String, output: ExecToolCallOutput) -> Self {
|
||||
Self::SandboxDenied { message, output }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,36 +1,55 @@
|
||||
use portable_pty::CommandBuilder;
|
||||
use portable_pty::PtySize;
|
||||
use portable_pty::native_pty_system;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::VecDeque;
|
||||
use std::io::ErrorKind;
|
||||
use std::io::Read;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex as StdMutex;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::AtomicI32;
|
||||
use std::sync::atomic::Ordering;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::Notify;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::time::Duration;
|
||||
use tokio::time::Instant;
|
||||
//! Unified Exec: interactive PTY execution orchestrated with approvals + sandboxing.
|
||||
//!
|
||||
//! Responsibilities
|
||||
//! - Manages interactive PTY sessions (create, reuse, buffer output with caps).
|
||||
//! - Uses the shared ToolOrchestrator to handle approval, sandbox selection, and
|
||||
//! retry semantics in a single, descriptive flow.
|
||||
//! - Spawns the PTY from a sandbox‑transformed `ExecEnv`; on sandbox denial,
|
||||
//! retries without sandbox when policy allows (no re‑prompt thanks to caching).
|
||||
//! - Uses the shared `is_likely_sandbox_denied` heuristic to keep denial messages
|
||||
//! consistent with other exec paths.
|
||||
//!
|
||||
//! Flow at a glance (open session)
|
||||
//! 1) Build a small request `{ command, cwd }`.
|
||||
//! 2) Orchestrator: approval (bypass/cache/prompt) → select sandbox → run.
|
||||
//! 3) Runtime: transform `CommandSpec` → `ExecEnv` → spawn PTY.
|
||||
//! 4) If denial, orchestrator retries with `SandboxType::None`.
|
||||
//! 5) Session is returned with streaming output + metadata.
|
||||
//!
|
||||
//! This keeps policy logic and user interaction centralized while the PTY/session
|
||||
//! concerns remain isolated here. The implementation is split between:
|
||||
//! - `session.rs`: PTY session lifecycle + output buffering.
|
||||
//! - `session_manager.rs`: orchestration (approvals, sandboxing, reuse) and request handling.
|
||||
|
||||
use crate::exec_command::ExecCommandSession;
|
||||
use crate::truncate::truncate_middle;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::AtomicI32;
|
||||
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
|
||||
mod errors;
|
||||
mod session;
|
||||
mod session_manager;
|
||||
|
||||
pub(crate) use errors::UnifiedExecError;
|
||||
pub(crate) use session::UnifiedExecSession;
|
||||
|
||||
const DEFAULT_TIMEOUT_MS: u64 = 1_000;
|
||||
const MAX_TIMEOUT_MS: u64 = 60_000;
|
||||
const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 128 * 1024; // 128 KiB
|
||||
|
||||
pub(crate) struct UnifiedExecContext<'a> {
|
||||
pub session: &'a Session,
|
||||
pub turn: &'a TurnContext,
|
||||
pub sub_id: &'a str,
|
||||
pub call_id: &'a str,
|
||||
pub session_id: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct UnifiedExecRequest<'a> {
|
||||
pub session_id: Option<i32>,
|
||||
pub input_chunks: &'a [String],
|
||||
pub timeout_ms: Option<u64>,
|
||||
}
|
||||
@@ -44,379 +63,60 @@ pub(crate) struct UnifiedExecResult {
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct UnifiedExecSessionManager {
|
||||
next_session_id: AtomicI32,
|
||||
sessions: Mutex<HashMap<i32, ManagedUnifiedExecSession>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ManagedUnifiedExecSession {
|
||||
session: ExecCommandSession,
|
||||
output_buffer: OutputBuffer,
|
||||
/// Notifies waiters whenever new output has been appended to
|
||||
/// `output_buffer`, allowing clients to poll for fresh data.
|
||||
output_notify: Arc<Notify>,
|
||||
output_task: JoinHandle<()>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct OutputBufferState {
|
||||
chunks: VecDeque<Vec<u8>>,
|
||||
total_bytes: usize,
|
||||
}
|
||||
|
||||
impl OutputBufferState {
|
||||
fn push_chunk(&mut self, chunk: Vec<u8>) {
|
||||
self.total_bytes = self.total_bytes.saturating_add(chunk.len());
|
||||
self.chunks.push_back(chunk);
|
||||
|
||||
let mut excess = self
|
||||
.total_bytes
|
||||
.saturating_sub(UNIFIED_EXEC_OUTPUT_MAX_BYTES);
|
||||
|
||||
while excess > 0 {
|
||||
match self.chunks.front_mut() {
|
||||
Some(front) if excess >= front.len() => {
|
||||
excess -= front.len();
|
||||
self.total_bytes = self.total_bytes.saturating_sub(front.len());
|
||||
self.chunks.pop_front();
|
||||
}
|
||||
Some(front) => {
|
||||
front.drain(..excess);
|
||||
self.total_bytes = self.total_bytes.saturating_sub(excess);
|
||||
break;
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn drain(&mut self) -> Vec<Vec<u8>> {
|
||||
let drained: Vec<Vec<u8>> = self.chunks.drain(..).collect();
|
||||
self.total_bytes = 0;
|
||||
drained
|
||||
}
|
||||
}
|
||||
|
||||
type OutputBuffer = Arc<Mutex<OutputBufferState>>;
|
||||
type OutputHandles = (OutputBuffer, Arc<Notify>);
|
||||
|
||||
impl ManagedUnifiedExecSession {
|
||||
fn new(
|
||||
session: ExecCommandSession,
|
||||
initial_output_rx: tokio::sync::broadcast::Receiver<Vec<u8>>,
|
||||
) -> Self {
|
||||
let output_buffer = Arc::new(Mutex::new(OutputBufferState::default()));
|
||||
let output_notify = Arc::new(Notify::new());
|
||||
let mut receiver = initial_output_rx;
|
||||
let buffer_clone = Arc::clone(&output_buffer);
|
||||
let notify_clone = Arc::clone(&output_notify);
|
||||
let output_task = tokio::spawn(async move {
|
||||
loop {
|
||||
match receiver.recv().await {
|
||||
Ok(chunk) => {
|
||||
let mut guard = buffer_clone.lock().await;
|
||||
guard.push_chunk(chunk);
|
||||
drop(guard);
|
||||
notify_clone.notify_waiters();
|
||||
}
|
||||
// If we lag behind the broadcast buffer, skip missed
|
||||
// messages but keep the task alive to continue streaming.
|
||||
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => {
|
||||
continue;
|
||||
}
|
||||
// When the sender closes, exit the task.
|
||||
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Self {
|
||||
session,
|
||||
output_buffer,
|
||||
output_notify,
|
||||
output_task,
|
||||
}
|
||||
}
|
||||
|
||||
fn writer_sender(&self) -> mpsc::Sender<Vec<u8>> {
|
||||
self.session.writer_sender()
|
||||
}
|
||||
|
||||
fn output_handles(&self) -> OutputHandles {
|
||||
(
|
||||
Arc::clone(&self.output_buffer),
|
||||
Arc::clone(&self.output_notify),
|
||||
)
|
||||
}
|
||||
|
||||
fn has_exited(&self) -> bool {
|
||||
self.session.has_exited()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ManagedUnifiedExecSession {
|
||||
fn drop(&mut self) {
|
||||
self.output_task.abort();
|
||||
}
|
||||
}
|
||||
|
||||
impl UnifiedExecSessionManager {
|
||||
pub async fn handle_request(
|
||||
&self,
|
||||
request: UnifiedExecRequest<'_>,
|
||||
) -> Result<UnifiedExecResult, UnifiedExecError> {
|
||||
let (timeout_ms, timeout_warning) = match request.timeout_ms {
|
||||
Some(requested) if requested > MAX_TIMEOUT_MS => (
|
||||
MAX_TIMEOUT_MS,
|
||||
Some(format!(
|
||||
"Warning: requested timeout {requested}ms exceeds maximum of {MAX_TIMEOUT_MS}ms; clamping to {MAX_TIMEOUT_MS}ms.\n"
|
||||
)),
|
||||
),
|
||||
Some(requested) => (requested, None),
|
||||
None => (DEFAULT_TIMEOUT_MS, None),
|
||||
};
|
||||
|
||||
let mut new_session: Option<ManagedUnifiedExecSession> = None;
|
||||
let session_id;
|
||||
let writer_tx;
|
||||
let output_buffer;
|
||||
let output_notify;
|
||||
|
||||
if let Some(existing_id) = request.session_id {
|
||||
let mut sessions = self.sessions.lock().await;
|
||||
match sessions.get(&existing_id) {
|
||||
Some(session) => {
|
||||
if session.has_exited() {
|
||||
sessions.remove(&existing_id);
|
||||
return Err(UnifiedExecError::UnknownSessionId {
|
||||
session_id: existing_id,
|
||||
});
|
||||
}
|
||||
let (buffer, notify) = session.output_handles();
|
||||
session_id = existing_id;
|
||||
writer_tx = session.writer_sender();
|
||||
output_buffer = buffer;
|
||||
output_notify = notify;
|
||||
}
|
||||
None => {
|
||||
return Err(UnifiedExecError::UnknownSessionId {
|
||||
session_id: existing_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
drop(sessions);
|
||||
} else {
|
||||
let command = request.input_chunks.to_vec();
|
||||
let new_id = self.next_session_id.fetch_add(1, Ordering::SeqCst);
|
||||
let (session, initial_output_rx) = create_unified_exec_session(&command).await?;
|
||||
let managed_session = ManagedUnifiedExecSession::new(session, initial_output_rx);
|
||||
let (buffer, notify) = managed_session.output_handles();
|
||||
writer_tx = managed_session.writer_sender();
|
||||
output_buffer = buffer;
|
||||
output_notify = notify;
|
||||
session_id = new_id;
|
||||
new_session = Some(managed_session);
|
||||
};
|
||||
|
||||
if request.session_id.is_some() {
|
||||
let joined_input = request.input_chunks.join(" ");
|
||||
if !joined_input.is_empty() && writer_tx.send(joined_input.into_bytes()).await.is_err()
|
||||
{
|
||||
return Err(UnifiedExecError::WriteToStdin);
|
||||
}
|
||||
}
|
||||
|
||||
let mut collected: Vec<u8> = Vec::with_capacity(4096);
|
||||
let start = Instant::now();
|
||||
let deadline = start + Duration::from_millis(timeout_ms);
|
||||
|
||||
loop {
|
||||
let drained_chunks;
|
||||
let mut wait_for_output = None;
|
||||
{
|
||||
let mut guard = output_buffer.lock().await;
|
||||
drained_chunks = guard.drain();
|
||||
if drained_chunks.is_empty() {
|
||||
wait_for_output = Some(output_notify.notified());
|
||||
}
|
||||
}
|
||||
|
||||
if drained_chunks.is_empty() {
|
||||
let remaining = deadline.saturating_duration_since(Instant::now());
|
||||
if remaining == Duration::ZERO {
|
||||
break;
|
||||
}
|
||||
|
||||
let notified = wait_for_output.unwrap_or_else(|| output_notify.notified());
|
||||
tokio::pin!(notified);
|
||||
tokio::select! {
|
||||
_ = &mut notified => {}
|
||||
_ = tokio::time::sleep(remaining) => break,
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
for chunk in drained_chunks {
|
||||
collected.extend_from_slice(&chunk);
|
||||
}
|
||||
|
||||
if Instant::now() >= deadline {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let (output, _maybe_tokens) = truncate_middle(
|
||||
&String::from_utf8_lossy(&collected),
|
||||
UNIFIED_EXEC_OUTPUT_MAX_BYTES,
|
||||
);
|
||||
let output = if let Some(warning) = timeout_warning {
|
||||
format!("{warning}{output}")
|
||||
} else {
|
||||
output
|
||||
};
|
||||
|
||||
let should_store_session = if let Some(session) = new_session.as_ref() {
|
||||
!session.has_exited()
|
||||
} else if request.session_id.is_some() {
|
||||
let mut sessions = self.sessions.lock().await;
|
||||
if let Some(existing) = sessions.get(&session_id) {
|
||||
if existing.has_exited() {
|
||||
sessions.remove(&session_id);
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
true
|
||||
};
|
||||
|
||||
if should_store_session {
|
||||
if let Some(session) = new_session {
|
||||
self.sessions.lock().await.insert(session_id, session);
|
||||
}
|
||||
Ok(UnifiedExecResult {
|
||||
session_id: Some(session_id),
|
||||
output,
|
||||
})
|
||||
} else {
|
||||
Ok(UnifiedExecResult {
|
||||
session_id: None,
|
||||
output,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_unified_exec_session(
|
||||
command: &[String],
|
||||
) -> Result<
|
||||
(
|
||||
ExecCommandSession,
|
||||
tokio::sync::broadcast::Receiver<Vec<u8>>,
|
||||
),
|
||||
UnifiedExecError,
|
||||
> {
|
||||
if command.is_empty() {
|
||||
return Err(UnifiedExecError::MissingCommandLine);
|
||||
}
|
||||
|
||||
let pty_system = native_pty_system();
|
||||
|
||||
let pair = pty_system
|
||||
.openpty(PtySize {
|
||||
rows: 24,
|
||||
cols: 80,
|
||||
pixel_width: 0,
|
||||
pixel_height: 0,
|
||||
})
|
||||
.map_err(UnifiedExecError::create_session)?;
|
||||
|
||||
// Safe thanks to the check at the top of the function.
|
||||
let mut command_builder = CommandBuilder::new(command[0].clone());
|
||||
for arg in &command[1..] {
|
||||
command_builder.arg(arg);
|
||||
}
|
||||
|
||||
let mut child = pair
|
||||
.slave
|
||||
.spawn_command(command_builder)
|
||||
.map_err(UnifiedExecError::create_session)?;
|
||||
let killer = child.clone_killer();
|
||||
|
||||
let (writer_tx, mut writer_rx) = mpsc::channel::<Vec<u8>>(128);
|
||||
let (output_tx, _) = tokio::sync::broadcast::channel::<Vec<u8>>(256);
|
||||
|
||||
let mut reader = pair
|
||||
.master
|
||||
.try_clone_reader()
|
||||
.map_err(UnifiedExecError::create_session)?;
|
||||
let output_tx_clone = output_tx.clone();
|
||||
let reader_handle = tokio::task::spawn_blocking(move || {
|
||||
let mut buf = [0u8; 8192];
|
||||
loop {
|
||||
match reader.read(&mut buf) {
|
||||
Ok(0) => break,
|
||||
Ok(n) => {
|
||||
let _ = output_tx_clone.send(buf[..n].to_vec());
|
||||
}
|
||||
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
|
||||
Err(ref e) if e.kind() == ErrorKind::WouldBlock => {
|
||||
std::thread::sleep(Duration::from_millis(5));
|
||||
continue;
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let writer = pair
|
||||
.master
|
||||
.take_writer()
|
||||
.map_err(UnifiedExecError::create_session)?;
|
||||
let writer = Arc::new(StdMutex::new(writer));
|
||||
let writer_handle = tokio::spawn({
|
||||
let writer = writer.clone();
|
||||
async move {
|
||||
while let Some(bytes) = writer_rx.recv().await {
|
||||
let writer = writer.clone();
|
||||
let _ = tokio::task::spawn_blocking(move || {
|
||||
if let Ok(mut guard) = writer.lock() {
|
||||
use std::io::Write;
|
||||
let _ = guard.write_all(&bytes);
|
||||
let _ = guard.flush();
|
||||
}
|
||||
})
|
||||
.await;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let exit_status = Arc::new(AtomicBool::new(false));
|
||||
let wait_exit_status = Arc::clone(&exit_status);
|
||||
let wait_handle = tokio::task::spawn_blocking(move || {
|
||||
let _ = child.wait();
|
||||
wait_exit_status.store(true, Ordering::SeqCst);
|
||||
});
|
||||
|
||||
let (session, initial_output_rx) = ExecCommandSession::new(
|
||||
writer_tx,
|
||||
output_tx,
|
||||
killer,
|
||||
reader_handle,
|
||||
writer_handle,
|
||||
wait_handle,
|
||||
exit_status,
|
||||
);
|
||||
Ok((session, initial_output_rx))
|
||||
sessions: Mutex<HashMap<i32, session::UnifiedExecSession>>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(unix)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[cfg(unix)]
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::codex::make_session_and_context;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use core_test_support::skip_if_sandbox;
|
||||
use std::sync::Arc;
|
||||
use tokio::time::Duration;
|
||||
|
||||
use super::session::OutputBufferState;
|
||||
|
||||
fn test_session_and_turn() -> (Arc<Session>, Arc<TurnContext>) {
|
||||
let (session, mut turn) = make_session_and_context();
|
||||
turn.approval_policy = AskForApproval::Never;
|
||||
turn.sandbox_policy = SandboxPolicy::DangerFullAccess;
|
||||
(Arc::new(session), Arc::new(turn))
|
||||
}
|
||||
|
||||
async fn run_unified_exec_request(
|
||||
session: &Arc<Session>,
|
||||
turn: &Arc<TurnContext>,
|
||||
session_id: Option<i32>,
|
||||
input: Vec<String>,
|
||||
timeout_ms: Option<u64>,
|
||||
) -> Result<UnifiedExecResult, UnifiedExecError> {
|
||||
let request_input = input;
|
||||
let request = UnifiedExecRequest {
|
||||
input_chunks: &request_input,
|
||||
timeout_ms,
|
||||
};
|
||||
|
||||
session
|
||||
.services
|
||||
.unified_exec_manager
|
||||
.handle_request(
|
||||
request,
|
||||
UnifiedExecContext {
|
||||
session,
|
||||
turn: turn.as_ref(),
|
||||
sub_id: "sub",
|
||||
call_id: "call",
|
||||
session_id,
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn push_chunk_trims_only_excess_bytes() {
|
||||
@@ -426,167 +126,170 @@ mod tests {
|
||||
buffer.push_chunk(vec![b'c']);
|
||||
|
||||
assert_eq!(buffer.total_bytes, UNIFIED_EXEC_OUTPUT_MAX_BYTES);
|
||||
assert_eq!(buffer.chunks.len(), 3);
|
||||
let snapshot = buffer.snapshot();
|
||||
assert_eq!(snapshot.len(), 3);
|
||||
assert_eq!(
|
||||
buffer.chunks.front().unwrap().len(),
|
||||
snapshot.first().unwrap().len(),
|
||||
UNIFIED_EXEC_OUTPUT_MAX_BYTES - 2
|
||||
);
|
||||
assert_eq!(buffer.chunks.pop_back().unwrap(), vec![b'c']);
|
||||
assert_eq!(buffer.chunks.pop_back().unwrap(), vec![b'b']);
|
||||
assert_eq!(snapshot.get(2).unwrap(), &vec![b'c']);
|
||||
assert_eq!(snapshot.get(1).unwrap(), &vec![b'b']);
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_persists_across_requests_jif() -> Result<(), UnifiedExecError> {
|
||||
async fn unified_exec_persists_across_requests() -> anyhow::Result<()> {
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let manager = UnifiedExecSessionManager::default();
|
||||
let (session, turn) = test_session_and_turn();
|
||||
|
||||
let open_shell = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: None,
|
||||
input_chunks: &["bash".to_string(), "-i".to_string()],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
let open_shell = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["bash".to_string(), "-i".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
let session_id = open_shell.session_id.expect("expected session_id");
|
||||
|
||||
manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: Some(session_id),
|
||||
input_chunks: &[
|
||||
"export".to_string(),
|
||||
"CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
|
||||
],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_id),
|
||||
vec![
|
||||
"export".to_string(),
|
||||
"CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
|
||||
],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let out_2 = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: Some(session_id),
|
||||
input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
let out_2 = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_id),
|
||||
vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
assert!(out_2.output.contains("codex"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn multi_unified_exec_sessions() -> Result<(), UnifiedExecError> {
|
||||
async fn multi_unified_exec_sessions() -> anyhow::Result<()> {
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let manager = UnifiedExecSessionManager::default();
|
||||
let (session, turn) = test_session_and_turn();
|
||||
|
||||
let shell_a = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: None,
|
||||
input_chunks: &["/bin/bash".to_string(), "-i".to_string()],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
let shell_a = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["/bin/bash".to_string(), "-i".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
let session_a = shell_a.session_id.expect("expected session id");
|
||||
|
||||
manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: Some(session_a),
|
||||
input_chunks: &["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_a),
|
||||
vec!["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let out_2 = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: None,
|
||||
input_chunks: &[
|
||||
"echo".to_string(),
|
||||
"$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(),
|
||||
],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
let out_2 = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec![
|
||||
"echo".to_string(),
|
||||
"$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(),
|
||||
],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
assert!(!out_2.output.contains("codex"));
|
||||
|
||||
let out_3 = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: Some(session_a),
|
||||
input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
let out_3 = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_a),
|
||||
vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
assert!(out_3.output.contains("codex"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test]
|
||||
async fn unified_exec_timeouts() -> Result<(), UnifiedExecError> {
|
||||
async fn unified_exec_timeouts() -> anyhow::Result<()> {
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let manager = UnifiedExecSessionManager::default();
|
||||
let (session, turn) = test_session_and_turn();
|
||||
|
||||
let open_shell = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: None,
|
||||
input_chunks: &["bash".to_string(), "-i".to_string()],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
let open_shell = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["bash".to_string(), "-i".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
let session_id = open_shell.session_id.expect("expected session id");
|
||||
|
||||
manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: Some(session_id),
|
||||
input_chunks: &[
|
||||
"export".to_string(),
|
||||
"CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
|
||||
],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_id),
|
||||
vec![
|
||||
"export".to_string(),
|
||||
"CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
|
||||
],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let out_2 = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: Some(session_id),
|
||||
input_chunks: &["sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
|
||||
timeout_ms: Some(10),
|
||||
})
|
||||
.await?;
|
||||
let out_2 = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_id),
|
||||
vec!["sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
|
||||
Some(10),
|
||||
)
|
||||
.await?;
|
||||
assert!(!out_2.output.contains("codex"));
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(7)).await;
|
||||
|
||||
let empty = Vec::new();
|
||||
let out_3 = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: Some(session_id),
|
||||
input_chunks: &empty,
|
||||
timeout_ms: Some(100),
|
||||
})
|
||||
.await?;
|
||||
let out_3 =
|
||||
run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100))
|
||||
.await?;
|
||||
|
||||
assert!(out_3.output.contains("codex"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test]
|
||||
#[ignore] // Ignored while we have a better way to test this.
|
||||
async fn requests_with_large_timeout_are_capped() -> Result<(), UnifiedExecError> {
|
||||
let manager = UnifiedExecSessionManager::default();
|
||||
async fn requests_with_large_timeout_are_capped() -> anyhow::Result<()> {
|
||||
let (session, turn) = test_session_and_turn();
|
||||
|
||||
let result = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: None,
|
||||
input_chunks: &["echo".to_string(), "codex".to_string()],
|
||||
timeout_ms: Some(120_000),
|
||||
})
|
||||
.await?;
|
||||
let result = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["echo".to_string(), "codex".to_string()],
|
||||
Some(120_000),
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert!(result.output.starts_with(
|
||||
"Warning: requested timeout 120000ms exceeds maximum of 60000ms; clamping to 60000ms.\n"
|
||||
@@ -596,61 +299,66 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test]
|
||||
#[ignore] // Ignored while we have a better way to test this.
|
||||
async fn completed_commands_do_not_persist_sessions() -> Result<(), UnifiedExecError> {
|
||||
let manager = UnifiedExecSessionManager::default();
|
||||
let result = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: None,
|
||||
input_chunks: &["/bin/echo".to_string(), "codex".to_string()],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
async fn completed_commands_do_not_persist_sessions() -> anyhow::Result<()> {
|
||||
let (session, turn) = test_session_and_turn();
|
||||
let result = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["/bin/echo".to_string(), "codex".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert!(result.session_id.is_none());
|
||||
assert!(result.output.contains("codex"));
|
||||
|
||||
assert!(manager.sessions.lock().await.is_empty());
|
||||
assert!(
|
||||
session
|
||||
.services
|
||||
.unified_exec_manager
|
||||
.sessions
|
||||
.lock()
|
||||
.await
|
||||
.is_empty()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn reusing_completed_session_returns_unknown_session() -> Result<(), UnifiedExecError> {
|
||||
async fn reusing_completed_session_returns_unknown_session() -> anyhow::Result<()> {
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let manager = UnifiedExecSessionManager::default();
|
||||
let (session, turn) = test_session_and_turn();
|
||||
|
||||
let open_shell = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: None,
|
||||
input_chunks: &["/bin/bash".to_string(), "-i".to_string()],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
let open_shell = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["/bin/bash".to_string(), "-i".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
let session_id = open_shell.session_id.expect("expected session id");
|
||||
|
||||
manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: Some(session_id),
|
||||
input_chunks: &["exit\n".to_string()],
|
||||
timeout_ms: Some(2_500),
|
||||
})
|
||||
.await?;
|
||||
run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_id),
|
||||
vec!["exit\n".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
|
||||
let err = manager
|
||||
.handle_request(UnifiedExecRequest {
|
||||
session_id: Some(session_id),
|
||||
input_chunks: &[],
|
||||
timeout_ms: Some(100),
|
||||
})
|
||||
.await
|
||||
.expect_err("expected unknown session error");
|
||||
let err =
|
||||
run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100))
|
||||
.await
|
||||
.expect_err("expected unknown session error");
|
||||
|
||||
match err {
|
||||
UnifiedExecError::UnknownSessionId { session_id: err_id } => {
|
||||
@@ -659,7 +367,15 @@ mod tests {
|
||||
other => panic!("expected UnknownSessionId, got {other:?}"),
|
||||
}
|
||||
|
||||
assert!(!manager.sessions.lock().await.contains_key(&session_id));
|
||||
assert!(
|
||||
!session
|
||||
.services
|
||||
.unified_exec_manager
|
||||
.sessions
|
||||
.lock()
|
||||
.await
|
||||
.contains_key(&session_id)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
217
codex-rs/core/src/unified_exec/session.rs
Normal file
217
codex-rs/core/src/unified_exec/session.rs
Normal file
@@ -0,0 +1,217 @@
|
||||
#![allow(clippy::module_inception)]
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::Notify;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::oneshot::error::TryRecvError;
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::time::Duration;
|
||||
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::StreamOutput;
|
||||
use crate::exec::is_likely_sandbox_denied;
|
||||
use crate::truncate::truncate_middle;
|
||||
use codex_utils_pty::ExecCommandSession;
|
||||
use codex_utils_pty::SpawnedPty;
|
||||
|
||||
use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES;
|
||||
use super::UnifiedExecError;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct OutputBufferState {
|
||||
chunks: VecDeque<Vec<u8>>,
|
||||
pub(crate) total_bytes: usize,
|
||||
}
|
||||
|
||||
impl OutputBufferState {
|
||||
pub(super) fn push_chunk(&mut self, chunk: Vec<u8>) {
|
||||
self.total_bytes = self.total_bytes.saturating_add(chunk.len());
|
||||
self.chunks.push_back(chunk);
|
||||
|
||||
let mut excess = self
|
||||
.total_bytes
|
||||
.saturating_sub(UNIFIED_EXEC_OUTPUT_MAX_BYTES);
|
||||
|
||||
while excess > 0 {
|
||||
match self.chunks.front_mut() {
|
||||
Some(front) if excess >= front.len() => {
|
||||
excess -= front.len();
|
||||
self.total_bytes = self.total_bytes.saturating_sub(front.len());
|
||||
self.chunks.pop_front();
|
||||
}
|
||||
Some(front) => {
|
||||
front.drain(..excess);
|
||||
self.total_bytes = self.total_bytes.saturating_sub(excess);
|
||||
break;
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn drain(&mut self) -> Vec<Vec<u8>> {
|
||||
let drained: Vec<Vec<u8>> = self.chunks.drain(..).collect();
|
||||
self.total_bytes = 0;
|
||||
drained
|
||||
}
|
||||
|
||||
pub(super) fn snapshot(&self) -> Vec<Vec<u8>> {
|
||||
self.chunks.iter().cloned().collect()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) type OutputBuffer = Arc<Mutex<OutputBufferState>>;
|
||||
pub(crate) type OutputHandles = (OutputBuffer, Arc<Notify>);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct UnifiedExecSession {
|
||||
session: ExecCommandSession,
|
||||
output_buffer: OutputBuffer,
|
||||
output_notify: Arc<Notify>,
|
||||
output_task: JoinHandle<()>,
|
||||
sandbox_type: SandboxType,
|
||||
}
|
||||
|
||||
impl UnifiedExecSession {
|
||||
pub(super) fn new(
|
||||
session: ExecCommandSession,
|
||||
initial_output_rx: tokio::sync::broadcast::Receiver<Vec<u8>>,
|
||||
sandbox_type: SandboxType,
|
||||
) -> Self {
|
||||
let output_buffer = Arc::new(Mutex::new(OutputBufferState::default()));
|
||||
let output_notify = Arc::new(Notify::new());
|
||||
let mut receiver = initial_output_rx;
|
||||
let buffer_clone = Arc::clone(&output_buffer);
|
||||
let notify_clone = Arc::clone(&output_notify);
|
||||
let output_task = tokio::spawn(async move {
|
||||
loop {
|
||||
match receiver.recv().await {
|
||||
Ok(chunk) => {
|
||||
let mut guard = buffer_clone.lock().await;
|
||||
guard.push_chunk(chunk);
|
||||
drop(guard);
|
||||
notify_clone.notify_waiters();
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
|
||||
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Self {
|
||||
session,
|
||||
output_buffer,
|
||||
output_notify,
|
||||
output_task,
|
||||
sandbox_type,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn writer_sender(&self) -> mpsc::Sender<Vec<u8>> {
|
||||
self.session.writer_sender()
|
||||
}
|
||||
|
||||
pub(super) fn output_handles(&self) -> OutputHandles {
|
||||
(
|
||||
Arc::clone(&self.output_buffer),
|
||||
Arc::clone(&self.output_notify),
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn has_exited(&self) -> bool {
|
||||
self.session.has_exited()
|
||||
}
|
||||
|
||||
pub(super) fn exit_code(&self) -> Option<i32> {
|
||||
self.session.exit_code()
|
||||
}
|
||||
|
||||
async fn snapshot_output(&self) -> Vec<Vec<u8>> {
|
||||
let guard = self.output_buffer.lock().await;
|
||||
guard.snapshot()
|
||||
}
|
||||
|
||||
fn sandbox_type(&self) -> SandboxType {
|
||||
self.sandbox_type
|
||||
}
|
||||
|
||||
pub(super) async fn check_for_sandbox_denial(&self) -> Result<(), UnifiedExecError> {
|
||||
if self.sandbox_type() == SandboxType::None || !self.has_exited() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let _ =
|
||||
tokio::time::timeout(Duration::from_millis(20), self.output_notify.notified()).await;
|
||||
|
||||
let collected_chunks = self.snapshot_output().await;
|
||||
let mut aggregated: Vec<u8> = Vec::new();
|
||||
for chunk in collected_chunks {
|
||||
aggregated.extend_from_slice(&chunk);
|
||||
}
|
||||
let aggregated_text = String::from_utf8_lossy(&aggregated).to_string();
|
||||
let exit_code = self.exit_code().unwrap_or(-1);
|
||||
|
||||
let exec_output = ExecToolCallOutput {
|
||||
exit_code,
|
||||
stdout: StreamOutput::new(aggregated_text.clone()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new(aggregated_text.clone()),
|
||||
duration: Duration::ZERO,
|
||||
timed_out: false,
|
||||
};
|
||||
|
||||
if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) {
|
||||
let (snippet, _) = truncate_middle(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_BYTES);
|
||||
let message = if snippet.is_empty() {
|
||||
format!("exit code {exit_code}")
|
||||
} else {
|
||||
snippet
|
||||
};
|
||||
return Err(UnifiedExecError::sandbox_denied(message, exec_output));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) async fn from_spawned(
|
||||
spawned: SpawnedPty,
|
||||
sandbox_type: SandboxType,
|
||||
) -> Result<Self, UnifiedExecError> {
|
||||
let SpawnedPty {
|
||||
session,
|
||||
output_rx,
|
||||
mut exit_rx,
|
||||
} = spawned;
|
||||
let managed = Self::new(session, output_rx, sandbox_type);
|
||||
|
||||
let exit_ready = match exit_rx.try_recv() {
|
||||
Ok(_) | Err(TryRecvError::Closed) => true,
|
||||
Err(TryRecvError::Empty) => false,
|
||||
};
|
||||
|
||||
if exit_ready {
|
||||
managed.check_for_sandbox_denial().await?;
|
||||
return Ok(managed);
|
||||
}
|
||||
|
||||
tokio::pin!(exit_rx);
|
||||
if tokio::time::timeout(Duration::from_millis(50), &mut exit_rx)
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
managed.check_for_sandbox_denial().await?;
|
||||
}
|
||||
|
||||
Ok(managed)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for UnifiedExecSession {
|
||||
fn drop(&mut self) {
|
||||
self.output_task.abort();
|
||||
}
|
||||
}
|
||||
288
codex-rs/core/src/unified_exec/session_manager.rs
Normal file
288
codex-rs/core/src/unified_exec/session_manager.rs
Normal file
@@ -0,0 +1,288 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use tokio::sync::Notify;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time::Duration;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::exec_env::create_env;
|
||||
use crate::sandboxing::ExecEnv;
|
||||
use crate::tools::orchestrator::ToolOrchestrator;
|
||||
use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
|
||||
use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::truncate::truncate_middle;
|
||||
|
||||
use super::DEFAULT_TIMEOUT_MS;
|
||||
use super::MAX_TIMEOUT_MS;
|
||||
use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES;
|
||||
use super::UnifiedExecContext;
|
||||
use super::UnifiedExecError;
|
||||
use super::UnifiedExecRequest;
|
||||
use super::UnifiedExecResult;
|
||||
use super::UnifiedExecSessionManager;
|
||||
use super::session::OutputBuffer;
|
||||
use super::session::UnifiedExecSession;
|
||||
|
||||
pub(super) struct SessionAcquisition {
|
||||
pub(super) session_id: i32,
|
||||
pub(super) writer_tx: mpsc::Sender<Vec<u8>>,
|
||||
pub(super) output_buffer: OutputBuffer,
|
||||
pub(super) output_notify: Arc<Notify>,
|
||||
pub(super) new_session: Option<UnifiedExecSession>,
|
||||
pub(super) reuse_requested: bool,
|
||||
}
|
||||
|
||||
impl UnifiedExecSessionManager {
|
||||
pub(super) async fn acquire_session(
|
||||
&self,
|
||||
request: &UnifiedExecRequest<'_>,
|
||||
context: &UnifiedExecContext<'_>,
|
||||
) -> Result<SessionAcquisition, UnifiedExecError> {
|
||||
if let Some(existing_id) = context.session_id {
|
||||
let mut sessions = self.sessions.lock().await;
|
||||
match sessions.get(&existing_id) {
|
||||
Some(session) => {
|
||||
if session.has_exited() {
|
||||
sessions.remove(&existing_id);
|
||||
return Err(UnifiedExecError::UnknownSessionId {
|
||||
session_id: existing_id,
|
||||
});
|
||||
}
|
||||
let (buffer, notify) = session.output_handles();
|
||||
let writer_tx = session.writer_sender();
|
||||
Ok(SessionAcquisition {
|
||||
session_id: existing_id,
|
||||
writer_tx,
|
||||
output_buffer: buffer,
|
||||
output_notify: notify,
|
||||
new_session: None,
|
||||
reuse_requested: true,
|
||||
})
|
||||
}
|
||||
None => Err(UnifiedExecError::UnknownSessionId {
|
||||
session_id: existing_id,
|
||||
}),
|
||||
}
|
||||
} else {
|
||||
let new_id = self
|
||||
.next_session_id
|
||||
.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
||||
let managed_session = self
|
||||
.open_session_with_sandbox(request.input_chunks.to_vec(), context)
|
||||
.await?;
|
||||
let (buffer, notify) = managed_session.output_handles();
|
||||
let writer_tx = managed_session.writer_sender();
|
||||
Ok(SessionAcquisition {
|
||||
session_id: new_id,
|
||||
writer_tx,
|
||||
output_buffer: buffer,
|
||||
output_notify: notify,
|
||||
new_session: Some(managed_session),
|
||||
reuse_requested: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn open_session_with_exec_env(
|
||||
&self,
|
||||
env: &ExecEnv,
|
||||
) -> Result<UnifiedExecSession, UnifiedExecError> {
|
||||
let (program, args) = env
|
||||
.command
|
||||
.split_first()
|
||||
.ok_or(UnifiedExecError::MissingCommandLine)?;
|
||||
let spawned =
|
||||
codex_utils_pty::spawn_pty_process(program, args, env.cwd.as_path(), &env.env)
|
||||
.await
|
||||
.map_err(|err| UnifiedExecError::create_session(err.to_string()))?;
|
||||
UnifiedExecSession::from_spawned(spawned, env.sandbox).await
|
||||
}
|
||||
|
||||
pub(super) async fn open_session_with_sandbox(
|
||||
&self,
|
||||
command: Vec<String>,
|
||||
context: &UnifiedExecContext<'_>,
|
||||
) -> Result<UnifiedExecSession, UnifiedExecError> {
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = UnifiedExecRuntime::new(self);
|
||||
let req = UnifiedExecToolRequest::new(
|
||||
command,
|
||||
context.turn.cwd.clone(),
|
||||
create_env(&context.turn.shell_environment_policy),
|
||||
);
|
||||
let tool_ctx = ToolCtx {
|
||||
session: context.session,
|
||||
sub_id: context.sub_id.to_string(),
|
||||
call_id: context.call_id.to_string(),
|
||||
tool_name: "unified_exec".to_string(),
|
||||
};
|
||||
orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&tool_ctx,
|
||||
context.turn,
|
||||
context.turn.approval_policy,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| UnifiedExecError::create_session(format!("{e:?}")))
|
||||
}
|
||||
|
||||
pub(super) async fn collect_output_until_deadline(
|
||||
output_buffer: &OutputBuffer,
|
||||
output_notify: &Arc<Notify>,
|
||||
deadline: Instant,
|
||||
) -> Vec<u8> {
|
||||
let mut collected: Vec<u8> = Vec::with_capacity(4096);
|
||||
loop {
|
||||
let drained_chunks;
|
||||
let mut wait_for_output = None;
|
||||
{
|
||||
let mut guard = output_buffer.lock().await;
|
||||
drained_chunks = guard.drain();
|
||||
if drained_chunks.is_empty() {
|
||||
wait_for_output = Some(output_notify.notified());
|
||||
}
|
||||
}
|
||||
|
||||
if drained_chunks.is_empty() {
|
||||
let remaining = deadline.saturating_duration_since(Instant::now());
|
||||
if remaining == Duration::ZERO {
|
||||
break;
|
||||
}
|
||||
|
||||
let notified = wait_for_output.unwrap_or_else(|| output_notify.notified());
|
||||
tokio::pin!(notified);
|
||||
tokio::select! {
|
||||
_ = &mut notified => {}
|
||||
_ = tokio::time::sleep(remaining) => break,
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
for chunk in drained_chunks {
|
||||
collected.extend_from_slice(&chunk);
|
||||
}
|
||||
|
||||
if Instant::now() >= deadline {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
collected
|
||||
}
|
||||
|
||||
pub(super) async fn should_store_session(&self, acquisition: &SessionAcquisition) -> bool {
|
||||
if let Some(session) = acquisition.new_session.as_ref() {
|
||||
!session.has_exited()
|
||||
} else if acquisition.reuse_requested {
|
||||
let mut sessions = self.sessions.lock().await;
|
||||
if let Some(existing) = sessions.get(&acquisition.session_id) {
|
||||
if existing.has_exited() {
|
||||
sessions.remove(&acquisition.session_id);
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn send_input_chunks(
|
||||
writer_tx: &mpsc::Sender<Vec<u8>>,
|
||||
chunks: &[String],
|
||||
) -> Result<(), UnifiedExecError> {
|
||||
let mut trailing_whitespace = true;
|
||||
for chunk in chunks {
|
||||
if chunk.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let leading_whitespace = chunk
|
||||
.chars()
|
||||
.next()
|
||||
.map(char::is_whitespace)
|
||||
.unwrap_or(true);
|
||||
|
||||
if !trailing_whitespace
|
||||
&& !leading_whitespace
|
||||
&& writer_tx.send(vec![b' ']).await.is_err()
|
||||
{
|
||||
return Err(UnifiedExecError::WriteToStdin);
|
||||
}
|
||||
|
||||
if writer_tx.send(chunk.as_bytes().to_vec()).await.is_err() {
|
||||
return Err(UnifiedExecError::WriteToStdin);
|
||||
}
|
||||
|
||||
trailing_whitespace = chunk
|
||||
.chars()
|
||||
.next_back()
|
||||
.map(char::is_whitespace)
|
||||
.unwrap_or(trailing_whitespace);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn handle_request(
|
||||
&self,
|
||||
request: UnifiedExecRequest<'_>,
|
||||
context: UnifiedExecContext<'_>,
|
||||
) -> Result<UnifiedExecResult, UnifiedExecError> {
|
||||
let (timeout_ms, timeout_warning) = match request.timeout_ms {
|
||||
Some(requested) if requested > MAX_TIMEOUT_MS => (
|
||||
MAX_TIMEOUT_MS,
|
||||
Some(format!(
|
||||
"Warning: requested timeout {requested}ms exceeds maximum of {MAX_TIMEOUT_MS}ms; clamping to {MAX_TIMEOUT_MS}ms.\n"
|
||||
)),
|
||||
),
|
||||
Some(requested) => (requested, None),
|
||||
None => (DEFAULT_TIMEOUT_MS, None),
|
||||
};
|
||||
|
||||
let mut acquisition = self.acquire_session(&request, &context).await?;
|
||||
|
||||
if acquisition.reuse_requested {
|
||||
Self::send_input_chunks(&acquisition.writer_tx, request.input_chunks).await?;
|
||||
}
|
||||
|
||||
let deadline = Instant::now() + Duration::from_millis(timeout_ms);
|
||||
let collected = Self::collect_output_until_deadline(
|
||||
&acquisition.output_buffer,
|
||||
&acquisition.output_notify,
|
||||
deadline,
|
||||
)
|
||||
.await;
|
||||
|
||||
let (output, _maybe_tokens) = truncate_middle(
|
||||
&String::from_utf8_lossy(&collected),
|
||||
UNIFIED_EXEC_OUTPUT_MAX_BYTES,
|
||||
);
|
||||
let output = if let Some(warning) = timeout_warning {
|
||||
format!("{warning}{output}")
|
||||
} else {
|
||||
output
|
||||
};
|
||||
|
||||
let should_store_session = self.should_store_session(&acquisition).await;
|
||||
let session_id = if should_store_session {
|
||||
if let Some(session) = acquisition.new_session.take() {
|
||||
self.sessions
|
||||
.lock()
|
||||
.await
|
||||
.insert(acquisition.session_id, session);
|
||||
}
|
||||
Some(acquisition.session_id)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(UnifiedExecResult { session_id, output })
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user