fix: ensure PatchApplyBeginEvent and PatchApplyEndEvent are dispatched reliably (#1760)

This is a follow-up to https://github.com/openai/codex/pull/1705, as
that PR inadvertently lost the logic where `PatchApplyBeginEvent` and
`PatchApplyEndEvent` events were sent when patches were auto-approved.

Though as part of this fix, I believe this also makes an important
safety fix to `assess_patch_safety()`, as there was a case that returned
`SandboxType::None`, which arguably is the thing we were trying to avoid
in #1705.

On a high level, we want there to be only one codepath where
`apply_patch` happens, which should be unified with the patch to run
`exec`, in general, so that sandboxing is applied consistently for both
cases.

Prior to this change, `apply_patch()` in `core` would either:

* exit early, delegating to `exec()` to shell out to `apply_patch` using
the appropriate sandbox
* proceed to run the logic for `apply_patch` in memory


549846b29a/codex-rs/core/src/apply_patch.rs (L61-L63)

In this implementation, only the latter would dispatch
`PatchApplyBeginEvent` and `PatchApplyEndEvent`, though the former would
dispatch `ExecCommandBeginEvent` and `ExecCommandEndEvent` for the
`apply_patch` call (or, more specifically, the `codex
--codex-run-as-apply-patch PATCH` call).

To unify things in this PR, we:

* Eliminate the back half of the `apply_patch()` function, and instead
have it also return with `DelegateToExec`, though we add an extra field
to the return value, `user_explicitly_approved_this_action`.
* In `codex.rs` where we process `DelegateToExec`, we use
`SandboxType::None` when `user_explicitly_approved_this_action` is
`true`. This means **we no longer run the apply_patch logic in memory**,
as we always `exec()`. (Note this is what allowed us to delete so much
code in `apply_patch.rs`.)
* In `codex.rs`, we further update `notify_exec_command_begin()` and
`notify_exec_command_end()` to take additional fields to determine what
type of notification to send: `ExecCommand` or `PatchApply`.

Admittedly, this PR also drops some of the functionality about giving
the user the opportunity to expand the set of writable roots as part of
approving the `apply_patch` command. I'm not sure how much that was
used, and we should probably rethink how that works as we are currently
tidying up the protocol to the TUI, in general.
This commit is contained in:
Michael Bolin
2025-07-31 11:13:57 -07:00
committed by GitHub
parent 549846b29a
commit 06c786b2da
3 changed files with 193 additions and 388 deletions

View File

@@ -4,7 +4,6 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::collections::HashSet;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Mutex;
@@ -31,6 +30,7 @@ use tracing::trace;
use tracing::warn;
use uuid::Uuid;
use crate::apply_patch::ApplyPatchExec;
use crate::apply_patch::CODEX_APPLY_PATCH_ARG1;
use crate::apply_patch::InternalApplyPatchInvocation;
use crate::apply_patch::convert_apply_patch_to_protocol;
@@ -74,8 +74,11 @@ use crate::protocol::EventMsg;
use crate::protocol::ExecApprovalRequestEvent;
use crate::protocol::ExecCommandBeginEvent;
use crate::protocol::ExecCommandEndEvent;
use crate::protocol::FileChange;
use crate::protocol::InputItem;
use crate::protocol::Op;
use crate::protocol::PatchApplyBeginEvent;
use crate::protocol::PatchApplyEndEvent;
use crate::protocol::ReviewDecision;
use crate::protocol::SandboxPolicy;
use crate::protocol::SessionConfiguredEvent;
@@ -358,20 +361,32 @@ impl Session {
}
}
async fn notify_exec_command_begin(
&self,
sub_id: &str,
call_id: &str,
command_for_display: Vec<String>,
command_cwd: &Path,
) {
async fn notify_exec_command_begin(&self, exec_command_context: ExecCommandContext) {
let ExecCommandContext {
sub_id,
call_id,
command_for_display,
cwd,
apply_patch,
} = exec_command_context;
let msg = match apply_patch {
Some(ApplyPatchCommandContext {
user_explicitly_approved_this_action,
changes,
}) => EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id,
auto_approved: !user_explicitly_approved_this_action,
changes,
}),
None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id,
command: command_for_display.clone(),
cwd,
}),
};
let event = Event {
id: sub_id.to_string(),
msg: EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id: call_id.to_string(),
command: command_for_display,
cwd: command_cwd.to_path_buf(),
}),
msg,
};
let _ = self.tx_event.send(event).await;
}
@@ -383,18 +398,33 @@ impl Session {
stdout: &str,
stderr: &str,
exit_code: i32,
is_apply_patch: bool,
) {
// Because stdout and stderr could each be up to 100 KiB, we send
// truncated versions.
const MAX_STREAM_OUTPUT: usize = 5 * 1024; // 5KiB
let stdout = stdout.chars().take(MAX_STREAM_OUTPUT).collect();
let stderr = stderr.chars().take(MAX_STREAM_OUTPUT).collect();
let msg = if is_apply_patch {
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
call_id: call_id.to_string(),
stdout,
stderr,
success: exit_code == 0,
})
} else {
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: call_id.to_string(),
stdout,
stderr,
exit_code,
})
};
let event = Event {
id: sub_id.to_string(),
// Because stdout and stderr could each be up to 100 KiB, we send
// truncated versions.
msg: EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: call_id.to_string(),
stdout: stdout.chars().take(MAX_STREAM_OUTPUT).collect(),
stderr: stderr.chars().take(MAX_STREAM_OUTPUT).collect(),
exit_code,
}),
msg,
};
let _ = self.tx_event.send(event).await;
}
@@ -502,6 +532,21 @@ impl State {
}
}
#[derive(Clone, Debug)]
pub(crate) struct ExecCommandContext {
pub(crate) sub_id: String,
pub(crate) call_id: String,
pub(crate) command_for_display: Vec<String>,
pub(crate) cwd: PathBuf,
pub(crate) apply_patch: Option<ApplyPatchCommandContext>,
}
#[derive(Clone, Debug)]
pub(crate) struct ApplyPatchCommandContext {
pub(crate) user_explicitly_approved_this_action: bool,
pub(crate) changes: HashMap<PathBuf, FileChange>,
}
/// A series of Turns in response to user input.
pub(crate) struct AgentTask {
sess: Arc<Session>,
@@ -1430,35 +1475,39 @@ async fn handle_container_exec_with_params(
call_id: String,
) -> ResponseInputItem {
// check if this was a patch, and apply it if so
let apply_patch_action_for_exec =
match maybe_parse_apply_patch_verified(&params.command, &params.cwd) {
MaybeApplyPatchVerified::Body(changes) => {
match apply_patch::apply_patch(sess, &sub_id, &call_id, changes).await {
InternalApplyPatchInvocation::Output(item) => return item,
InternalApplyPatchInvocation::DelegateToExec(action) => Some(action),
let apply_patch_exec = match maybe_parse_apply_patch_verified(&params.command, &params.cwd) {
MaybeApplyPatchVerified::Body(changes) => {
match apply_patch::apply_patch(sess, &sub_id, &call_id, changes).await {
InternalApplyPatchInvocation::Output(item) => return item,
InternalApplyPatchInvocation::DelegateToExec(apply_patch_exec) => {
Some(apply_patch_exec)
}
}
MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
// It looks like an invocation of `apply_patch`, but we
// could not resolve it into a patch that would apply
// cleanly. Return to model for resample.
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("error: {parse_error:#}"),
success: None,
},
};
}
MaybeApplyPatchVerified::ShellParseError(error) => {
trace!("Failed to parse shell command, {error:?}");
None
}
MaybeApplyPatchVerified::NotApplyPatch => None,
};
}
MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
// It looks like an invocation of `apply_patch`, but we
// could not resolve it into a patch that would apply
// cleanly. Return to model for resample.
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("error: {parse_error:#}"),
success: None,
},
};
}
MaybeApplyPatchVerified::ShellParseError(error) => {
trace!("Failed to parse shell command, {error:?}");
None
}
MaybeApplyPatchVerified::NotApplyPatch => None,
};
let (params, safety, command_for_display) = match apply_patch_action_for_exec {
Some(ApplyPatchAction { patch, cwd, .. }) => {
let (params, safety, command_for_display) = match &apply_patch_exec {
Some(ApplyPatchExec {
action: ApplyPatchAction { patch, cwd, .. },
user_explicitly_approved_this_action,
}) => {
let path_to_codex = std::env::current_exe()
.ok()
.map(|p| p.to_string_lossy().to_string());
@@ -1478,13 +1527,22 @@ async fn handle_container_exec_with_params(
CODEX_APPLY_PATCH_ARG1.to_string(),
patch.clone(),
],
cwd,
cwd: cwd.clone(),
timeout_ms: params.timeout_ms,
env: HashMap::new(),
};
let safety =
assess_safety_for_untrusted_command(sess.approval_policy, &sess.sandbox_policy);
(params, safety, vec!["apply_patch".to_string(), patch])
let safety = if *user_explicitly_approved_this_action {
SafetyCheck::AutoApprove {
sandbox_type: SandboxType::None,
}
} else {
assess_safety_for_untrusted_command(sess.approval_policy, &sess.sandbox_policy)
};
(
params,
safety,
vec!["apply_patch".to_string(), patch.clone()],
)
}
None => {
let safety = {
@@ -1545,7 +1603,22 @@ async fn handle_container_exec_with_params(
}
};
sess.notify_exec_command_begin(&sub_id, &call_id, command_for_display.clone(), &params.cwd)
let exec_command_context = ExecCommandContext {
sub_id: sub_id.clone(),
call_id: call_id.clone(),
command_for_display: command_for_display.clone(),
cwd: params.cwd.clone(),
apply_patch: apply_patch_exec.map(
|ApplyPatchExec {
action,
user_explicitly_approved_this_action,
}| ApplyPatchCommandContext {
user_explicitly_approved_this_action,
changes: convert_apply_patch_to_protocol(&action),
},
),
};
sess.notify_exec_command_begin(exec_command_context.clone())
.await;
let params = maybe_run_with_user_profile(params, sess);
@@ -1567,8 +1640,15 @@ async fn handle_container_exec_with_params(
duration,
} = output;
sess.notify_exec_command_end(&sub_id, &call_id, &stdout, &stderr, exit_code)
.await;
sess.notify_exec_command_end(
&sub_id,
&call_id,
&stdout,
&stderr,
exit_code,
exec_command_context.apply_patch.is_some(),
)
.await;
let is_success = exit_code == 0;
let content = format_exec_output(
@@ -1586,16 +1666,7 @@ async fn handle_container_exec_with_params(
}
}
Err(CodexErr::Sandbox(error)) => {
handle_sandbox_error(
error,
sandbox_type,
params,
command_for_display,
sess,
sub_id,
call_id,
)
.await
handle_sandbox_error(params, exec_command_context, error, sandbox_type, sess).await
}
Err(e) => {
// Handle non-sandbox errors
@@ -1611,14 +1682,17 @@ async fn handle_container_exec_with_params(
}
async fn handle_sandbox_error(
params: ExecParams,
exec_command_context: ExecCommandContext,
error: SandboxErr,
sandbox_type: SandboxType,
params: ExecParams,
command_for_display: Vec<String>,
sess: &Session,
sub_id: String,
call_id: String,
) -> ResponseInputItem {
let call_id = exec_command_context.call_id.clone();
let sub_id = exec_command_context.sub_id.clone();
let cwd = exec_command_context.cwd.clone();
let is_apply_patch = exec_command_context.apply_patch.is_some();
// Early out if the user never wants to be asked for approval; just return to the model immediately
if sess.approval_policy == AskForApproval::Never {
return ResponseInputItem::FunctionCallOutput {
@@ -1648,7 +1722,7 @@ async fn handle_sandbox_error(
sub_id.clone(),
call_id.clone(),
params.command.clone(),
params.cwd.clone(),
cwd.clone(),
Some("command failed; retry without sandbox?".to_string()),
)
.await;
@@ -1664,8 +1738,7 @@ async fn handle_sandbox_error(
sess.notify_background_event(&sub_id, "retrying command without sandbox")
.await;
sess.notify_exec_command_begin(&sub_id, &call_id, command_for_display, &params.cwd)
.await;
sess.notify_exec_command_begin(exec_command_context).await;
// This is an escalated retry; the policy will not be
// examined and the sandbox has been set to `None`.
@@ -1687,8 +1760,15 @@ async fn handle_sandbox_error(
duration,
} = retry_output;
sess.notify_exec_command_end(&sub_id, &call_id, &stdout, &stderr, exit_code)
.await;
sess.notify_exec_command_end(
&sub_id,
&call_id,
&stdout,
&stderr,
exit_code,
is_apply_patch,
)
.await;
let is_success = exit_code == 0;
let content = format_exec_output(