chore: rework tools execution workflow (#5278)
Re-work the tool execution flow. Read `orchestrator.rs` to understand the structure
This commit is contained in:
@@ -232,6 +232,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct ExecCommandContext {
|
||||
pub(crate) sub_id: String,
|
||||
pub(crate) call_id: String,
|
||||
@@ -243,6 +244,7 @@ pub(crate) struct ExecCommandContext {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct ApplyPatchCommandContext {
|
||||
pub(crate) user_explicitly_approved_this_action: bool,
|
||||
pub(crate) changes: HashMap<PathBuf, FileChange>,
|
||||
|
||||
235
codex-rs/core/src/tools/events.rs
Normal file
235
codex-rs/core/src/tools/events.rs
Normal file
@@ -0,0 +1,235 @@
|
||||
use crate::codex::Session;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::parse_command::parse_command;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecCommandBeginEvent;
|
||||
use crate::protocol::ExecCommandEndEvent;
|
||||
use crate::protocol::FileChange;
|
||||
use crate::protocol::PatchApplyBeginEvent;
|
||||
use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::TurnDiffEvent;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::format_exec_output;
|
||||
use super::format_exec_output_str;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct ToolEventCtx<'a> {
|
||||
pub session: &'a Session,
|
||||
pub sub_id: &'a str,
|
||||
pub call_id: &'a str,
|
||||
pub turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
|
||||
}
|
||||
|
||||
impl<'a> ToolEventCtx<'a> {
|
||||
pub fn new(
|
||||
session: &'a Session,
|
||||
sub_id: &'a str,
|
||||
call_id: &'a str,
|
||||
turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
|
||||
) -> Self {
|
||||
Self {
|
||||
session,
|
||||
sub_id,
|
||||
call_id,
|
||||
turn_diff_tracker,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum ToolEventStage {
|
||||
Begin,
|
||||
Success(ExecToolCallOutput),
|
||||
Failure(ToolEventFailure),
|
||||
}
|
||||
|
||||
pub(crate) enum ToolEventFailure {
|
||||
Output(ExecToolCallOutput),
|
||||
Message(String),
|
||||
}
|
||||
// Concrete, allocation-free emitter: avoid trait objects and boxed futures.
|
||||
pub(crate) enum ToolEmitter {
|
||||
Shell {
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
},
|
||||
ApplyPatch {
|
||||
changes: HashMap<PathBuf, FileChange>,
|
||||
auto_approved: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl ToolEmitter {
|
||||
pub fn shell(command: Vec<String>, cwd: PathBuf) -> Self {
|
||||
Self::Shell { command, cwd }
|
||||
}
|
||||
|
||||
pub fn apply_patch(changes: HashMap<PathBuf, FileChange>, auto_approved: bool) -> Self {
|
||||
Self::ApplyPatch {
|
||||
changes,
|
||||
auto_approved,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn emit(&self, ctx: ToolEventCtx<'_>, stage: ToolEventStage) {
|
||||
match (self, stage) {
|
||||
(Self::Shell { command, cwd }, ToolEventStage::Begin) => {
|
||||
ctx.session
|
||||
.send_event(Event {
|
||||
id: ctx.sub_id.to_string(),
|
||||
msg: EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parse_command(command),
|
||||
}),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Output(output))) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Message(message))) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
String::new(),
|
||||
(*message).to_string(),
|
||||
(*message).to_string(),
|
||||
-1,
|
||||
Duration::ZERO,
|
||||
format_exec_output(&message),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
(
|
||||
Self::ApplyPatch {
|
||||
changes,
|
||||
auto_approved,
|
||||
},
|
||||
ToolEventStage::Begin,
|
||||
) => {
|
||||
if let Some(tracker) = ctx.turn_diff_tracker {
|
||||
let mut guard = tracker.lock().await;
|
||||
guard.on_patch_begin(changes);
|
||||
}
|
||||
ctx.session
|
||||
.send_event(Event {
|
||||
id: ctx.sub_id.to_string(),
|
||||
msg: EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
auto_approved: *auto_approved,
|
||||
changes: changes.clone(),
|
||||
}),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
(Self::ApplyPatch { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_patch_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.exit_code == 0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::ApplyPatch { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Output(output)),
|
||||
) => {
|
||||
emit_patch_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.exit_code == 0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::ApplyPatch { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Message(message)),
|
||||
) => {
|
||||
emit_patch_end(ctx, String::new(), (*message).to_string(), false).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn emit_exec_end(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
stdout: String,
|
||||
stderr: String,
|
||||
aggregated_output: String,
|
||||
exit_code: i32,
|
||||
duration: Duration,
|
||||
formatted_output: String,
|
||||
) {
|
||||
ctx.session
|
||||
.send_event(Event {
|
||||
id: ctx.sub_id.to_string(),
|
||||
msg: EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
stdout,
|
||||
stderr,
|
||||
aggregated_output,
|
||||
exit_code,
|
||||
duration,
|
||||
formatted_output,
|
||||
}),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn emit_patch_end(ctx: ToolEventCtx<'_>, stdout: String, stderr: String, success: bool) {
|
||||
ctx.session
|
||||
.send_event(Event {
|
||||
id: ctx.sub_id.to_string(),
|
||||
msg: EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
stdout,
|
||||
stderr,
|
||||
success,
|
||||
}),
|
||||
})
|
||||
.await;
|
||||
|
||||
if let Some(tracker) = ctx.turn_diff_tracker {
|
||||
let unified_diff = {
|
||||
let mut guard = tracker.lock().await;
|
||||
guard.get_unified_diff()
|
||||
};
|
||||
if let Ok(Some(unified_diff)) = unified_diff {
|
||||
ctx.session
|
||||
.send_event(Event {
|
||||
id: ctx.sub_id.to_string(),
|
||||
msg: EventMsg::TurnDiff(TurnDiffEvent { unified_diff }),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8,7 +8,6 @@ use crate::client_common::tools::ResponsesApiTool;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::openai_tools::JsonSchema;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
@@ -16,6 +15,7 @@ use crate::tools::handle_container_exec_with_params;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
use crate::tools::spec::ApplyPatchToolArgs;
|
||||
use crate::tools::spec::JsonSchema;
|
||||
use async_trait::async_trait;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
@@ -72,6 +72,7 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
};
|
||||
|
||||
let content = handle_container_exec_with_params(
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
|
||||
use crate::exec_command::ExecCommandParams;
|
||||
use crate::exec_command::WRITE_STDIN_TOOL_NAME;
|
||||
use crate::exec_command::WriteStdinParams;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
|
||||
pub struct ExecStreamHandler;
|
||||
|
||||
#[async_trait]
|
||||
impl ToolHandler for ExecStreamHandler {
|
||||
fn kind(&self) -> ToolKind {
|
||||
ToolKind::Function
|
||||
}
|
||||
|
||||
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
|
||||
let ToolInvocation {
|
||||
session,
|
||||
tool_name,
|
||||
payload,
|
||||
..
|
||||
} = invocation;
|
||||
|
||||
let arguments = match payload {
|
||||
ToolPayload::Function { arguments } => arguments,
|
||||
_ => {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"exec_stream handler received unsupported payload".to_string(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let content = match tool_name.as_str() {
|
||||
EXEC_COMMAND_TOOL_NAME => {
|
||||
let params: ExecCommandParams = serde_json::from_str(&arguments).map_err(|e| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"failed to parse function arguments: {e:?}"
|
||||
))
|
||||
})?;
|
||||
session.handle_exec_command_tool(params).await?
|
||||
}
|
||||
WRITE_STDIN_TOOL_NAME => {
|
||||
let params: WriteStdinParams = serde_json::from_str(&arguments).map_err(|e| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"failed to parse function arguments: {e:?}"
|
||||
))
|
||||
})?;
|
||||
session.handle_write_stdin_tool(params).await?
|
||||
}
|
||||
_ => {
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"exec_stream handler does not support tool {tool_name}"
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
Ok(ToolOutput::Function {
|
||||
content,
|
||||
success: Some(true),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
pub mod apply_patch;
|
||||
mod exec_stream;
|
||||
mod grep_files;
|
||||
mod list_dir;
|
||||
mod mcp;
|
||||
@@ -14,7 +13,6 @@ mod view_image;
|
||||
pub use plan::PLAN_TOOL;
|
||||
|
||||
pub use apply_patch::ApplyPatchHandler;
|
||||
pub use exec_stream::ExecStreamHandler;
|
||||
pub use grep_files::GrepFilesHandler;
|
||||
pub use list_dir::ListDirHandler;
|
||||
pub use mcp::McpHandler;
|
||||
|
||||
@@ -2,12 +2,12 @@ use crate::client_common::tools::ResponsesApiTool;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::codex::Session;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::openai_tools::JsonSchema;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
use crate::tools::spec::JsonSchema;
|
||||
use async_trait::async_trait;
|
||||
use codex_protocol::plan_tool::UpdatePlanArgs;
|
||||
use codex_protocol::protocol::Event;
|
||||
|
||||
@@ -24,6 +24,7 @@ impl ShellHandler {
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification,
|
||||
arg0: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +35,13 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
|
||||
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
|
||||
let ToolInvocation {
|
||||
session, payload, ..
|
||||
session,
|
||||
turn,
|
||||
sub_id,
|
||||
call_id,
|
||||
tool_name: _tool_name,
|
||||
payload,
|
||||
..
|
||||
} = invocation;
|
||||
|
||||
let args = match payload {
|
||||
@@ -73,13 +79,23 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
};
|
||||
|
||||
let request = UnifiedExecRequest {
|
||||
session_id: parsed_session_id,
|
||||
input_chunks: &input,
|
||||
timeout_ms,
|
||||
};
|
||||
|
||||
let value = session
|
||||
.run_unified_exec_request(request)
|
||||
.services
|
||||
.unified_exec_manager
|
||||
.handle_request(
|
||||
request,
|
||||
crate::unified_exec::UnifiedExecContext {
|
||||
session: &session,
|
||||
turn: turn.as_ref(),
|
||||
sub_id: &sub_id,
|
||||
call_id: &call_id,
|
||||
session_id: parsed_session_id,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!("unified exec failed: {err:?}"))
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
pub mod context;
|
||||
pub mod events;
|
||||
pub(crate) mod handlers;
|
||||
pub mod orchestrator;
|
||||
pub mod parallel;
|
||||
pub mod registry;
|
||||
pub mod router;
|
||||
pub mod runtimes;
|
||||
pub mod sandboxing;
|
||||
pub mod spec;
|
||||
|
||||
use crate::apply_patch;
|
||||
use crate::apply_patch::ApplyPatchExec;
|
||||
use crate::apply_patch::InternalApplyPatchInvocation;
|
||||
use crate::apply_patch::convert_apply_patch_to_protocol;
|
||||
use crate::codex::Session;
|
||||
@@ -15,14 +18,19 @@ use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::StdoutStream;
|
||||
use crate::executor::ExecutionMode;
|
||||
use crate::executor::errors::ExecError;
|
||||
use crate::executor::linkers::PreparedExec;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::ApplyPatchCommandContext;
|
||||
use crate::tools::context::ExecCommandContext;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
use crate::tools::events::ToolEventCtx;
|
||||
use crate::tools::events::ToolEventFailure;
|
||||
use crate::tools::events::ToolEventStage;
|
||||
use crate::tools::orchestrator::ToolOrchestrator;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::runtimes::shell::ShellRequest;
|
||||
use crate::tools::runtimes::shell::ShellRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use codex_apply_patch::MaybeApplyPatchVerified;
|
||||
use codex_apply_patch::maybe_parse_apply_patch_verified;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
@@ -56,7 +64,7 @@ pub(crate) async fn handle_container_exec_with_params(
|
||||
sub_id: String,
|
||||
call_id: String,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
let otel_event_manager = turn_context.client.get_otel_event_manager();
|
||||
let _otel_event_manager = turn_context.client.get_otel_event_manager();
|
||||
|
||||
if params.with_escalated_permissions.unwrap_or(false)
|
||||
&& !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
|
||||
@@ -100,86 +108,142 @@ pub(crate) async fn handle_container_exec_with_params(
|
||||
MaybeApplyPatchVerified::NotApplyPatch => None,
|
||||
};
|
||||
|
||||
let command_for_display = if let Some(exec) = apply_patch_exec.as_ref() {
|
||||
vec!["apply_patch".to_string(), exec.action.patch.clone()]
|
||||
} else {
|
||||
params.command.clone()
|
||||
};
|
||||
|
||||
let exec_command_context = ExecCommandContext {
|
||||
sub_id: sub_id.clone(),
|
||||
call_id: call_id.clone(),
|
||||
command_for_display: command_for_display.clone(),
|
||||
cwd: params.cwd.clone(),
|
||||
apply_patch: apply_patch_exec.as_ref().map(
|
||||
|ApplyPatchExec {
|
||||
action,
|
||||
user_explicitly_approved_this_action,
|
||||
}| ApplyPatchCommandContext {
|
||||
user_explicitly_approved_this_action: *user_explicitly_approved_this_action,
|
||||
changes: convert_apply_patch_to_protocol(action),
|
||||
},
|
||||
let (event_emitter, diff_opt) = match apply_patch_exec.as_ref() {
|
||||
Some(exec) => (
|
||||
ToolEmitter::apply_patch(
|
||||
convert_apply_patch_to_protocol(&exec.action),
|
||||
!exec.user_explicitly_approved_this_action,
|
||||
),
|
||||
Some(&turn_diff_tracker),
|
||||
),
|
||||
None => (
|
||||
ToolEmitter::shell(params.command.clone(), params.cwd.clone()),
|
||||
None,
|
||||
),
|
||||
tool_name: tool_name.to_string(),
|
||||
otel_event_manager,
|
||||
};
|
||||
|
||||
let mode = match apply_patch_exec {
|
||||
Some(exec) => ExecutionMode::ApplyPatch(exec),
|
||||
None => ExecutionMode::Shell,
|
||||
};
|
||||
let event_ctx = ToolEventCtx::new(sess.as_ref(), &sub_id, &call_id, diff_opt);
|
||||
event_emitter.emit(event_ctx, ToolEventStage::Begin).await;
|
||||
|
||||
sess.services.executor.update_environment(
|
||||
turn_context.sandbox_policy.clone(),
|
||||
turn_context.cwd.clone(),
|
||||
);
|
||||
// Build runtime contexts only when needed (shell/apply_patch below).
|
||||
|
||||
let prepared_exec = PreparedExec::new(
|
||||
exec_command_context,
|
||||
params,
|
||||
command_for_display,
|
||||
mode,
|
||||
Some(StdoutStream {
|
||||
if let Some(exec) = apply_patch_exec {
|
||||
// Route apply_patch execution through the new orchestrator/runtime.
|
||||
let req = ApplyPatchRequest {
|
||||
patch: exec.action.patch.clone(),
|
||||
cwd: params.cwd.clone(),
|
||||
timeout_ms: params.timeout_ms,
|
||||
user_explicitly_approved: exec.user_explicitly_approved_this_action,
|
||||
codex_exe: turn_context.codex_linux_sandbox_exe.clone(),
|
||||
};
|
||||
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ApplyPatchRuntime::new();
|
||||
let tool_ctx = ToolCtx {
|
||||
session: sess.as_ref(),
|
||||
sub_id: sub_id.clone(),
|
||||
call_id: call_id.clone(),
|
||||
tx_event: sess.get_tx_event(),
|
||||
}),
|
||||
turn_context.shell_environment_policy.use_profile,
|
||||
);
|
||||
tool_name: tool_name.to_string(),
|
||||
};
|
||||
|
||||
let output_result = sess
|
||||
.run_exec_with_events(
|
||||
turn_diff_tracker.clone(),
|
||||
prepared_exec,
|
||||
turn_context.approval_policy,
|
||||
)
|
||||
.await;
|
||||
let out = orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&tool_ctx,
|
||||
&turn_context,
|
||||
turn_context.approval_policy,
|
||||
)
|
||||
.await;
|
||||
|
||||
// always make sure to truncate the output if its length isn't controlled.
|
||||
match output_result {
|
||||
handle_exec_outcome(&event_emitter, event_ctx, out).await
|
||||
} else {
|
||||
// Route shell execution through the new orchestrator/runtime.
|
||||
let req = ShellRequest {
|
||||
command: params.command.clone(),
|
||||
cwd: params.cwd.clone(),
|
||||
timeout_ms: params.timeout_ms,
|
||||
env: params.env.clone(),
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification.clone(),
|
||||
};
|
||||
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ShellRuntime::new();
|
||||
let tool_ctx = ToolCtx {
|
||||
session: sess.as_ref(),
|
||||
sub_id: sub_id.clone(),
|
||||
call_id: call_id.clone(),
|
||||
tool_name: tool_name.to_string(),
|
||||
};
|
||||
|
||||
let out = orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&tool_ctx,
|
||||
&turn_context,
|
||||
turn_context.approval_policy,
|
||||
)
|
||||
.await;
|
||||
|
||||
handle_exec_outcome(&event_emitter, event_ctx, out).await
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_exec_outcome(
|
||||
event_emitter: &ToolEmitter,
|
||||
event_ctx: ToolEventCtx<'_>,
|
||||
out: Result<ExecToolCallOutput, ToolError>,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
let event;
|
||||
let result = match out {
|
||||
Ok(output) => {
|
||||
let ExecToolCallOutput { exit_code, .. } = &output;
|
||||
let content = format_exec_output_apply_patch(&output);
|
||||
if *exit_code == 0 {
|
||||
let content = format_exec_output_for_model(&output);
|
||||
let exit_code = output.exit_code;
|
||||
event = ToolEventStage::Success(output);
|
||||
if exit_code == 0 {
|
||||
Ok(content)
|
||||
} else {
|
||||
Err(FunctionCallError::RespondToModel(content))
|
||||
}
|
||||
}
|
||||
Err(ExecError::Function(err)) => Err(truncate_function_error(err)),
|
||||
Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err(
|
||||
FunctionCallError::RespondToModel(format_exec_output_apply_patch(&output)),
|
||||
),
|
||||
Err(ExecError::Codex(err)) => {
|
||||
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
|
||||
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
|
||||
let response = format_exec_output_for_model(&output);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
|
||||
Err(FunctionCallError::RespondToModel(response))
|
||||
}
|
||||
Err(ToolError::Codex(err)) => {
|
||||
let message = format!("execution error: {err:?}");
|
||||
let response = format_exec_output(&message);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Message(message));
|
||||
Err(FunctionCallError::RespondToModel(format_exec_output(
|
||||
&message,
|
||||
&response,
|
||||
)))
|
||||
}
|
||||
}
|
||||
Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
|
||||
// Normalize common rejection messages for exec tools so tests and
|
||||
// users see a clear, consistent phrase.
|
||||
let normalized = if msg == "rejected by user" {
|
||||
"exec command rejected by user".to_string()
|
||||
} else {
|
||||
msg
|
||||
};
|
||||
let response = format_exec_output(&normalized);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
|
||||
Err(FunctionCallError::RespondToModel(format_exec_output(
|
||||
&response,
|
||||
)))
|
||||
}
|
||||
};
|
||||
event_emitter.emit(event_ctx, event).await;
|
||||
result
|
||||
}
|
||||
|
||||
pub fn format_exec_output_apply_patch(exec_output: &ExecToolCallOutput) -> String {
|
||||
/// Format the combined exec output for sending back to the model.
|
||||
/// Includes exit code and duration metadata; truncates large bodies safely.
|
||||
pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
|
||||
let ExecToolCallOutput {
|
||||
exit_code,
|
||||
duration,
|
||||
@@ -233,18 +297,7 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
||||
format_exec_output(content)
|
||||
}
|
||||
|
||||
fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
|
||||
match err {
|
||||
FunctionCallError::RespondToModel(msg) => {
|
||||
FunctionCallError::RespondToModel(format_exec_output(&msg))
|
||||
}
|
||||
FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
|
||||
FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
fn format_exec_output(content: &str) -> String {
|
||||
pub(super) fn format_exec_output(content: &str) -> String {
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
let total_lines = content.lines().count();
|
||||
@@ -315,6 +368,17 @@ mod tests {
|
||||
use super::*;
|
||||
use regex_lite::Regex;
|
||||
|
||||
fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
|
||||
match err {
|
||||
FunctionCallError::RespondToModel(msg) => {
|
||||
FunctionCallError::RespondToModel(format_exec_output(&msg))
|
||||
}
|
||||
FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
|
||||
FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
|
||||
let pattern = truncated_message_pattern(line, total_lines);
|
||||
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
|
||||
|
||||
172
codex-rs/core/src/tools/orchestrator.rs
Normal file
172
codex-rs/core/src/tools/orchestrator.rs
Normal file
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
Module: orchestrator
|
||||
|
||||
Central place for approvals + sandbox selection + retry semantics. Drives a
|
||||
simple sequence for any ToolRuntime: approval → select sandbox → attempt →
|
||||
retry without sandbox on denial (no re‑approval thanks to caching).
|
||||
*/
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
|
||||
pub(crate) struct ToolOrchestrator {
|
||||
sandbox: SandboxManager,
|
||||
}
|
||||
|
||||
impl ToolOrchestrator {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
sandbox: SandboxManager::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run<Rq, Out, T>(
|
||||
&mut self,
|
||||
tool: &mut T,
|
||||
req: &Rq,
|
||||
tool_ctx: &ToolCtx<'_>,
|
||||
turn_ctx: &crate::codex::TurnContext,
|
||||
approval_policy: AskForApproval,
|
||||
) -> Result<Out, ToolError>
|
||||
where
|
||||
T: ToolRuntime<Rq, Out>,
|
||||
{
|
||||
let otel = turn_ctx.client.get_otel_event_manager();
|
||||
let otel_tn = &tool_ctx.tool_name;
|
||||
let otel_ci = &tool_ctx.call_id;
|
||||
let otel_user = codex_otel::otel_event_manager::ToolDecisionSource::User;
|
||||
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
|
||||
|
||||
// 1) Approval
|
||||
let needs_initial_approval =
|
||||
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
|
||||
let mut already_approved = false;
|
||||
|
||||
if needs_initial_approval {
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
sub_id: &tool_ctx.sub_id,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: None,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
}
|
||||
already_approved = true;
|
||||
} else {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
|
||||
// 2) First attempt under the selected sandbox.
|
||||
let mut initial_sandbox = self
|
||||
.sandbox
|
||||
.select_initial(&turn_ctx.sandbox_policy, tool.sandbox_preference());
|
||||
if tool.wants_escalated_first_attempt(req) {
|
||||
initial_sandbox = crate::exec::SandboxType::None;
|
||||
}
|
||||
let initial_attempt = SandboxAttempt {
|
||||
sandbox: initial_sandbox,
|
||||
policy: &turn_ctx.sandbox_policy,
|
||||
manager: &self.sandbox,
|
||||
sandbox_cwd: &turn_ctx.cwd,
|
||||
codex_linux_sandbox_exe: turn_ctx.codex_linux_sandbox_exe.as_ref(),
|
||||
};
|
||||
|
||||
match tool.run(req, &initial_attempt, tool_ctx).await {
|
||||
Ok(out) => {
|
||||
// We have a successful initial result
|
||||
Ok(out)
|
||||
}
|
||||
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
|
||||
if !tool.escalate_on_failure() {
|
||||
return Err(ToolError::SandboxDenied(
|
||||
"sandbox denied and no retry".to_string(),
|
||||
));
|
||||
}
|
||||
// Under `Never`, do not retry without sandbox; surface a concise message
|
||||
// derived from the actual output (platform-agnostic).
|
||||
if matches!(approval_policy, AskForApproval::Never) {
|
||||
let msg = build_never_denied_message_from_output(output.as_ref());
|
||||
return Err(ToolError::SandboxDenied(msg));
|
||||
}
|
||||
|
||||
// Ask for approval before retrying without sandbox.
|
||||
if !tool.should_bypass_approval(approval_policy, already_approved) {
|
||||
let reason_msg = build_denial_reason_from_output(output.as_ref());
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
sub_id: &tool_ctx.sub_id,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: Some(reason_msg),
|
||||
};
|
||||
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user);
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
}
|
||||
}
|
||||
|
||||
let escalated_attempt = SandboxAttempt {
|
||||
sandbox: crate::exec::SandboxType::None,
|
||||
policy: &turn_ctx.sandbox_policy,
|
||||
manager: &self.sandbox,
|
||||
sandbox_cwd: &turn_ctx.cwd,
|
||||
codex_linux_sandbox_exe: None,
|
||||
};
|
||||
|
||||
// Second attempt.
|
||||
(*tool).run(req, &escalated_attempt, tool_ctx).await
|
||||
}
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn build_never_denied_message_from_output(output: &ExecToolCallOutput) -> String {
|
||||
let body = format!(
|
||||
"{}\n{}\n{}",
|
||||
output.stderr.text, output.stdout.text, output.aggregated_output.text
|
||||
)
|
||||
.to_lowercase();
|
||||
|
||||
let detail = if body.contains("permission denied") {
|
||||
Some("Permission denied")
|
||||
} else if body.contains("operation not permitted") {
|
||||
Some("Operation not permitted")
|
||||
} else if body.contains("read-only file system") {
|
||||
Some("Read-only file system")
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
match detail {
|
||||
Some(tag) => format!("failed in sandbox: {tag}"),
|
||||
None => "failed in sandbox".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_denial_reason_from_output(_output: &ExecToolCallOutput) -> String {
|
||||
// Keep approval reason terse and stable for UX/tests, but accept the
|
||||
// output so we can evolve heuristics later without touching call sites.
|
||||
"command failed; retry without sandbox?".to_string()
|
||||
}
|
||||
148
codex-rs/core/src/tools/runtimes/apply_patch.rs
Normal file
148
codex-rs/core/src/tools/runtimes/apply_patch.rs
Normal file
@@ -0,0 +1,148 @@
|
||||
//! Apply Patch runtime: executes verified patches under the orchestrator.
|
||||
//!
|
||||
//! Assumes `apply_patch` verification/approval happened upstream. Reuses that
|
||||
//! decision to avoid re-prompting, builds the self-invocation command for
|
||||
//! `codex --codex-run-as-apply-patch`, and runs under the current
|
||||
//! `SandboxAttempt` with a minimal environment.
|
||||
use crate::CODEX_APPLY_PATCH_ARG1;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ApplyPatchRequest {
|
||||
pub patch: String,
|
||||
pub cwd: PathBuf,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub user_explicitly_approved: bool,
|
||||
pub codex_exe: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ApplyPatchRuntime;
|
||||
|
||||
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub(crate) struct ApprovalKey {
|
||||
patch: String,
|
||||
cwd: PathBuf,
|
||||
}
|
||||
|
||||
impl ApplyPatchRuntime {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn build_command_spec(req: &ApplyPatchRequest) -> Result<CommandSpec, ToolError> {
|
||||
use std::env;
|
||||
let exe = if let Some(path) = &req.codex_exe {
|
||||
path.clone()
|
||||
} else {
|
||||
env::current_exe()
|
||||
.map_err(|e| ToolError::Rejected(format!("failed to determine codex exe: {e}")))?
|
||||
};
|
||||
let program = exe.to_string_lossy().to_string();
|
||||
Ok(CommandSpec {
|
||||
program,
|
||||
args: vec![CODEX_APPLY_PATCH_ARG1.to_string(), req.patch.clone()],
|
||||
cwd: req.cwd.clone(),
|
||||
timeout_ms: req.timeout_ms,
|
||||
// Run apply_patch with a minimal environment for determinism and to avoid leaks.
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn stdout_stream(ctx: &ToolCtx<'_>) -> Option<crate::exec::StdoutStream> {
|
||||
Some(crate::exec::StdoutStream {
|
||||
sub_id: ctx.sub_id.clone(),
|
||||
call_id: ctx.call_id.clone(),
|
||||
tx_event: ctx.session.get_tx_event(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Sandboxable for ApplyPatchRuntime {
|
||||
fn sandbox_preference(&self) -> SandboxablePreference {
|
||||
SandboxablePreference::Auto
|
||||
}
|
||||
fn escalate_on_failure(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
|
||||
type ApprovalKey = ApprovalKey;
|
||||
|
||||
fn approval_key(&self, req: &ApplyPatchRequest) -> Self::ApprovalKey {
|
||||
ApprovalKey {
|
||||
patch: req.patch.clone(),
|
||||
cwd: req.cwd.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn start_approval_async<'a>(
|
||||
&'a mut self,
|
||||
req: &'a ApplyPatchRequest,
|
||||
ctx: ApprovalCtx<'a>,
|
||||
) -> BoxFuture<'a, ReviewDecision> {
|
||||
let key = self.approval_key(req);
|
||||
let session = ctx.session;
|
||||
let sub_id = ctx.sub_id.to_string();
|
||||
let call_id = ctx.call_id.to_string();
|
||||
let cwd = req.cwd.clone();
|
||||
let retry_reason = ctx.retry_reason.clone();
|
||||
let user_explicitly_approved = req.user_explicitly_approved;
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
if let Some(reason) = retry_reason {
|
||||
session
|
||||
.request_command_approval(
|
||||
sub_id,
|
||||
call_id,
|
||||
vec!["apply_patch".to_string()],
|
||||
cwd,
|
||||
Some(reason),
|
||||
)
|
||||
.await
|
||||
} else if user_explicitly_approved {
|
||||
ReviewDecision::ApprovedForSession
|
||||
} else {
|
||||
ReviewDecision::Approved
|
||||
}
|
||||
})
|
||||
.await
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
|
||||
async fn run(
|
||||
&mut self,
|
||||
req: &ApplyPatchRequest,
|
||||
attempt: &SandboxAttempt<'_>,
|
||||
ctx: &ToolCtx<'_>,
|
||||
) -> Result<ExecToolCallOutput, ToolError> {
|
||||
let spec = Self::build_command_spec(req)?;
|
||||
let env = attempt
|
||||
.env_for(&spec)
|
||||
.map_err(|err| ToolError::Codex(err.into()))?;
|
||||
let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
|
||||
.await
|
||||
.map_err(ToolError::Codex)?;
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
38
codex-rs/core/src/tools/runtimes/mod.rs
Normal file
38
codex-rs/core/src/tools/runtimes/mod.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
Module: runtimes
|
||||
|
||||
Concrete ToolRuntime implementations for specific tools. Each runtime stays
|
||||
small and focused and reuses the orchestrator for approvals + sandbox + retry.
|
||||
*/
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
pub mod apply_patch;
|
||||
pub mod shell;
|
||||
pub mod unified_exec;
|
||||
|
||||
/// Shared helper to construct a CommandSpec from a tokenized command line.
|
||||
/// Validates that at least a program is present.
|
||||
pub(crate) fn build_command_spec(
|
||||
command: &[String],
|
||||
cwd: &Path,
|
||||
env: &HashMap<String, String>,
|
||||
timeout_ms: Option<u64>,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
) -> Result<CommandSpec, ToolError> {
|
||||
let (program, args) = command
|
||||
.split_first()
|
||||
.ok_or_else(|| ToolError::Rejected("command args are empty".to_string()))?;
|
||||
Ok(CommandSpec {
|
||||
program: program.clone(),
|
||||
args: args.to_vec(),
|
||||
cwd: cwd.to_path_buf(),
|
||||
env: env.clone(),
|
||||
timeout_ms,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
})
|
||||
}
|
||||
164
codex-rs/core/src/tools/runtimes/shell.rs
Normal file
164
codex-rs/core/src/tools/runtimes/shell.rs
Normal file
@@ -0,0 +1,164 @@
|
||||
/*
|
||||
Runtime: shell
|
||||
|
||||
Executes shell requests under the orchestrator: asks for approval when needed,
|
||||
builds a CommandSpec, and runs it under the current SandboxAttempt.
|
||||
*/
|
||||
use crate::command_safety::is_dangerous_command::command_might_be_dangerous;
|
||||
use crate::command_safety::is_safe_command::is_known_safe_command;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ShellRequest {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub env: std::collections::HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ShellRuntime;
|
||||
|
||||
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub(crate) struct ApprovalKey {
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
escalated: bool,
|
||||
}
|
||||
|
||||
impl ShellRuntime {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn stdout_stream(ctx: &ToolCtx<'_>) -> Option<crate::exec::StdoutStream> {
|
||||
Some(crate::exec::StdoutStream {
|
||||
sub_id: ctx.sub_id.clone(),
|
||||
call_id: ctx.call_id.clone(),
|
||||
tx_event: ctx.session.get_tx_event(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Sandboxable for ShellRuntime {
|
||||
fn sandbox_preference(&self) -> SandboxablePreference {
|
||||
SandboxablePreference::Auto
|
||||
}
|
||||
fn escalate_on_failure(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Approvable<ShellRequest> for ShellRuntime {
|
||||
type ApprovalKey = ApprovalKey;
|
||||
|
||||
fn approval_key(&self, req: &ShellRequest) -> Self::ApprovalKey {
|
||||
ApprovalKey {
|
||||
command: req.command.clone(),
|
||||
cwd: req.cwd.clone(),
|
||||
escalated: req.with_escalated_permissions.unwrap_or(false),
|
||||
}
|
||||
}
|
||||
|
||||
fn start_approval_async<'a>(
|
||||
&'a mut self,
|
||||
req: &'a ShellRequest,
|
||||
ctx: ApprovalCtx<'a>,
|
||||
) -> BoxFuture<'a, ReviewDecision> {
|
||||
let key = self.approval_key(req);
|
||||
let command = req.command.clone();
|
||||
let cwd = req.cwd.clone();
|
||||
let reason = ctx
|
||||
.retry_reason
|
||||
.clone()
|
||||
.or_else(|| req.justification.clone());
|
||||
let session = ctx.session;
|
||||
let sub_id = ctx.sub_id.to_string();
|
||||
let call_id = ctx.call_id.to_string();
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
session
|
||||
.request_command_approval(sub_id, call_id, command, cwd, reason)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
})
|
||||
}
|
||||
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &ShellRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
if is_known_safe_command(&req.command) {
|
||||
return false;
|
||||
}
|
||||
match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => {
|
||||
// In DangerFullAccess, only prompt if the command looks dangerous.
|
||||
if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
|
||||
return command_might_be_dangerous(&req.command);
|
||||
}
|
||||
|
||||
// In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for
|
||||
// non‑escalated, non‑dangerous commands — let the sandbox enforce
|
||||
// restrictions (e.g., block network/write) without a user prompt.
|
||||
let wants_escalation = req.with_escalated_permissions.unwrap_or(false);
|
||||
if wants_escalation {
|
||||
return true;
|
||||
}
|
||||
command_might_be_dangerous(&req.command)
|
||||
}
|
||||
AskForApproval::UnlessTrusted => !is_known_safe_command(&req.command),
|
||||
}
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
|
||||
req.with_escalated_permissions.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
|
||||
async fn run(
|
||||
&mut self,
|
||||
req: &ShellRequest,
|
||||
attempt: &SandboxAttempt<'_>,
|
||||
ctx: &ToolCtx<'_>,
|
||||
) -> Result<ExecToolCallOutput, ToolError> {
|
||||
let spec = build_command_spec(
|
||||
&req.command,
|
||||
&req.cwd,
|
||||
&req.env,
|
||||
req.timeout_ms,
|
||||
req.with_escalated_permissions,
|
||||
req.justification.clone(),
|
||||
)?;
|
||||
let env = attempt
|
||||
.env_for(&spec)
|
||||
.map_err(|err| ToolError::Codex(err.into()))?;
|
||||
let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
|
||||
.await
|
||||
.map_err(ToolError::Codex)?;
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
123
codex-rs/core/src/tools/runtimes/unified_exec.rs
Normal file
123
codex-rs/core/src/tools/runtimes/unified_exec.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
Runtime: unified exec
|
||||
|
||||
Handles approval + sandbox orchestration for unified exec requests, delegating to
|
||||
the session manager to spawn PTYs once an ExecEnv is prepared.
|
||||
*/
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use crate::unified_exec::UnifiedExecError;
|
||||
use crate::unified_exec::UnifiedExecSession;
|
||||
use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UnifiedExecRequest {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub env: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub struct UnifiedExecApprovalKey {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
}
|
||||
|
||||
pub struct UnifiedExecRuntime<'a> {
|
||||
manager: &'a UnifiedExecSessionManager,
|
||||
}
|
||||
|
||||
impl UnifiedExecRequest {
|
||||
pub fn new(command: Vec<String>, cwd: PathBuf, env: HashMap<String, String>) -> Self {
|
||||
Self { command, cwd, env }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> UnifiedExecRuntime<'a> {
|
||||
pub fn new(manager: &'a UnifiedExecSessionManager) -> Self {
|
||||
Self { manager }
|
||||
}
|
||||
}
|
||||
|
||||
impl Sandboxable for UnifiedExecRuntime<'_> {
|
||||
fn sandbox_preference(&self) -> SandboxablePreference {
|
||||
SandboxablePreference::Auto
|
||||
}
|
||||
|
||||
fn escalate_on_failure(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
||||
type ApprovalKey = UnifiedExecApprovalKey;
|
||||
|
||||
fn approval_key(&self, req: &UnifiedExecRequest) -> Self::ApprovalKey {
|
||||
UnifiedExecApprovalKey {
|
||||
command: req.command.clone(),
|
||||
cwd: req.cwd.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn start_approval_async<'b>(
|
||||
&'b mut self,
|
||||
req: &'b UnifiedExecRequest,
|
||||
ctx: ApprovalCtx<'b>,
|
||||
) -> BoxFuture<'b, ReviewDecision> {
|
||||
let key = self.approval_key(req);
|
||||
let session = ctx.session;
|
||||
let sub_id = ctx.sub_id.to_string();
|
||||
let call_id = ctx.call_id.to_string();
|
||||
let command = req.command.clone();
|
||||
let cwd = req.cwd.clone();
|
||||
let reason = ctx.retry_reason.clone();
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
session
|
||||
.request_command_approval(sub_id, call_id, command, cwd, reason)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRuntime<'a> {
|
||||
async fn run(
|
||||
&mut self,
|
||||
req: &UnifiedExecRequest,
|
||||
attempt: &SandboxAttempt<'_>,
|
||||
_ctx: &ToolCtx<'_>,
|
||||
) -> Result<UnifiedExecSession, ToolError> {
|
||||
let spec = build_command_spec(&req.command, &req.cwd, &req.env, None, None, None)
|
||||
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
|
||||
let exec_env = attempt
|
||||
.env_for(&spec)
|
||||
.map_err(|err| ToolError::Codex(err.into()))?;
|
||||
self.manager
|
||||
.open_session_with_exec_env(&exec_env)
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
UnifiedExecError::SandboxDenied { output, .. } => {
|
||||
ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied {
|
||||
output: Box::new(output),
|
||||
}))
|
||||
}
|
||||
other => ToolError::Rejected(other.to_string()),
|
||||
})
|
||||
}
|
||||
}
|
||||
190
codex-rs/core/src/tools/sandboxing.rs
Normal file
190
codex-rs/core/src/tools/sandboxing.rs
Normal file
@@ -0,0 +1,190 @@
|
||||
//! Shared approvals and sandboxing traits used by tool runtimes.
|
||||
//!
|
||||
//! Consolidates the approval flow primitives (`ApprovalDecision`, `ApprovalStore`,
|
||||
//! `ApprovalCtx`, `Approvable`) together with the sandbox orchestration traits
|
||||
//! and helpers (`Sandboxable`, `ToolRuntime`, `SandboxAttempt`, etc.).
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::error::CodexErr;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::sandboxing::SandboxTransformError;
|
||||
use crate::state::SessionServices;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
use std::path::Path;
|
||||
|
||||
use futures::Future;
|
||||
use futures::future::BoxFuture;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Default, Debug)]
|
||||
pub(crate) struct ApprovalStore {
|
||||
// Store serialized keys for generic caching across requests.
|
||||
map: HashMap<String, ReviewDecision>,
|
||||
}
|
||||
|
||||
impl ApprovalStore {
|
||||
pub fn get<K>(&self, key: &K) -> Option<ReviewDecision>
|
||||
where
|
||||
K: Serialize,
|
||||
{
|
||||
let s = serde_json::to_string(key).ok()?;
|
||||
self.map.get(&s).cloned()
|
||||
}
|
||||
|
||||
pub fn put<K>(&mut self, key: K, value: ReviewDecision)
|
||||
where
|
||||
K: Serialize,
|
||||
{
|
||||
if let Ok(s) = serde_json::to_string(&key) {
|
||||
self.map.insert(s, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn with_cached_approval<K, F, Fut>(
|
||||
services: &SessionServices,
|
||||
key: K,
|
||||
fetch: F,
|
||||
) -> ReviewDecision
|
||||
where
|
||||
K: Serialize + Clone,
|
||||
F: FnOnce() -> Fut,
|
||||
Fut: Future<Output = ReviewDecision>,
|
||||
{
|
||||
{
|
||||
let store = services.tool_approvals.lock().await;
|
||||
if let Some(decision) = store.get(&key) {
|
||||
return decision;
|
||||
}
|
||||
}
|
||||
|
||||
let decision = fetch().await;
|
||||
|
||||
if matches!(decision, ReviewDecision::ApprovedForSession) {
|
||||
let mut store = services.tool_approvals.lock().await;
|
||||
store.put(key, ReviewDecision::ApprovedForSession);
|
||||
}
|
||||
|
||||
decision
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct ApprovalCtx<'a> {
|
||||
pub session: &'a Session,
|
||||
pub sub_id: &'a str,
|
||||
pub call_id: &'a str,
|
||||
pub retry_reason: Option<String>,
|
||||
}
|
||||
|
||||
pub(crate) trait Approvable<Req> {
|
||||
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
|
||||
|
||||
fn approval_key(&self, req: &Req) -> Self::ApprovalKey;
|
||||
|
||||
/// Some tools may request to skip the sandbox on the first attempt
|
||||
/// (e.g., when the request explicitly asks for escalated permissions).
|
||||
/// Defaults to `false`.
|
||||
fn wants_escalated_first_attempt(&self, _req: &Req) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn should_bypass_approval(&self, policy: AskForApproval, already_approved: bool) -> bool {
|
||||
if already_approved {
|
||||
// We do not ask one more time
|
||||
return true;
|
||||
}
|
||||
matches!(policy, AskForApproval::Never)
|
||||
}
|
||||
|
||||
/// Decide whether an initial user approval should be requested before the
|
||||
/// first attempt. Defaults to the orchestrator's behavior (pre‑refactor):
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
_req: &Req,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
}
|
||||
}
|
||||
|
||||
fn start_approval_async<'a>(
|
||||
&'a mut self,
|
||||
req: &'a Req,
|
||||
ctx: ApprovalCtx<'a>,
|
||||
) -> BoxFuture<'a, ReviewDecision>;
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum SandboxablePreference {
|
||||
Auto,
|
||||
#[allow(dead_code)] // Will be used by later tools.
|
||||
Require,
|
||||
#[allow(dead_code)] // Will be used by later tools.
|
||||
Forbid,
|
||||
}
|
||||
|
||||
pub(crate) trait Sandboxable {
|
||||
fn sandbox_preference(&self) -> SandboxablePreference;
|
||||
fn escalate_on_failure(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct ToolCtx<'a> {
|
||||
pub session: &'a Session,
|
||||
pub sub_id: String,
|
||||
pub call_id: String,
|
||||
pub tool_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum ToolError {
|
||||
Rejected(String),
|
||||
SandboxDenied(String),
|
||||
Codex(CodexErr),
|
||||
}
|
||||
|
||||
pub(crate) trait ToolRuntime<Req, Out>: Approvable<Req> + Sandboxable {
|
||||
async fn run(
|
||||
&mut self,
|
||||
req: &Req,
|
||||
attempt: &SandboxAttempt<'_>,
|
||||
ctx: &ToolCtx,
|
||||
) -> Result<Out, ToolError>;
|
||||
}
|
||||
|
||||
pub(crate) struct SandboxAttempt<'a> {
|
||||
pub sandbox: crate::exec::SandboxType,
|
||||
pub policy: &'a crate::protocol::SandboxPolicy,
|
||||
pub(crate) manager: &'a SandboxManager,
|
||||
pub(crate) sandbox_cwd: &'a Path,
|
||||
pub codex_linux_sandbox_exe: Option<&'a std::path::PathBuf>,
|
||||
}
|
||||
|
||||
impl<'a> SandboxAttempt<'a> {
|
||||
pub fn env_for(
|
||||
&self,
|
||||
spec: &CommandSpec,
|
||||
) -> Result<crate::sandboxing::ExecEnv, SandboxTransformError> {
|
||||
self.manager.transform(
|
||||
spec,
|
||||
self.policy,
|
||||
self.sandbox,
|
||||
self.sandbox_cwd,
|
||||
self.codex_linux_sandbox_exe,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -815,12 +815,7 @@ pub(crate) fn build_specs(
|
||||
config: &ToolsConfig,
|
||||
mcp_tools: Option<HashMap<String, mcp_types::Tool>>,
|
||||
) -> ToolRegistryBuilder {
|
||||
use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
|
||||
use crate::exec_command::WRITE_STDIN_TOOL_NAME;
|
||||
use crate::exec_command::create_exec_command_tool_for_responses_api;
|
||||
use crate::exec_command::create_write_stdin_tool_for_responses_api;
|
||||
use crate::tools::handlers::ApplyPatchHandler;
|
||||
use crate::tools::handlers::ExecStreamHandler;
|
||||
use crate::tools::handlers::GrepFilesHandler;
|
||||
use crate::tools::handlers::ListDirHandler;
|
||||
use crate::tools::handlers::McpHandler;
|
||||
@@ -836,7 +831,6 @@ pub(crate) fn build_specs(
|
||||
let mut builder = ToolRegistryBuilder::new();
|
||||
|
||||
let shell_handler = Arc::new(ShellHandler);
|
||||
let exec_stream_handler = Arc::new(ExecStreamHandler);
|
||||
let unified_exec_handler = Arc::new(UnifiedExecHandler);
|
||||
let plan_handler = Arc::new(PlanHandler);
|
||||
let apply_patch_handler = Arc::new(ApplyPatchHandler);
|
||||
@@ -844,7 +838,10 @@ pub(crate) fn build_specs(
|
||||
let mcp_handler = Arc::new(McpHandler);
|
||||
let mcp_resource_handler = Arc::new(McpResourceHandler);
|
||||
|
||||
if config.experimental_unified_exec_tool {
|
||||
let use_unified_exec = config.experimental_unified_exec_tool
|
||||
|| matches!(config.shell_type, ConfigShellToolType::Streamable);
|
||||
|
||||
if use_unified_exec {
|
||||
builder.push_spec(create_unified_exec_tool());
|
||||
builder.register_handler("unified_exec", unified_exec_handler);
|
||||
} else {
|
||||
@@ -856,14 +853,7 @@ pub(crate) fn build_specs(
|
||||
builder.push_spec(ToolSpec::LocalShell {});
|
||||
}
|
||||
ConfigShellToolType::Streamable => {
|
||||
builder.push_spec(ToolSpec::Function(
|
||||
create_exec_command_tool_for_responses_api(),
|
||||
));
|
||||
builder.push_spec(ToolSpec::Function(
|
||||
create_write_stdin_tool_for_responses_api(),
|
||||
));
|
||||
builder.register_handler(EXEC_COMMAND_TOOL_NAME, exec_stream_handler.clone());
|
||||
builder.register_handler(WRITE_STDIN_TOOL_NAME, exec_stream_handler);
|
||||
// Already handled by use_unified_exec.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user