Support graceful agent interruption (#5287)

This commit is contained in:
pakrym-oai
2025-10-17 11:52:57 -07:00
committed by GitHub
parent 6915ba2100
commit c03e31ecf5
13 changed files with 309 additions and 55 deletions

View File

@@ -1,6 +1,7 @@
use std::sync::Arc;
use async_trait::async_trait;
use tokio_util::sync::CancellationToken;
use crate::codex::TurnContext;
use crate::codex::compact;
@@ -25,6 +26,7 @@ impl SessionTask for CompactTask {
ctx: Arc<TurnContext>,
sub_id: String,
input: Vec<InputItem>,
_cancellation_token: CancellationToken,
) -> Option<String> {
compact::run_compact_task(session.clone_session(), ctx, sub_id, input).await
}

View File

@@ -3,9 +3,15 @@ mod regular;
mod review;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use tokio::select;
use tokio::sync::Notify;
use tokio_util::sync::CancellationToken;
use tokio_util::task::AbortOnDropHandle;
use tracing::trace;
use tracing::warn;
use crate::codex::Session;
use crate::codex::TurnContext;
@@ -23,6 +29,8 @@ pub(crate) use compact::CompactTask;
pub(crate) use regular::RegularTask;
pub(crate) use review::ReviewTask;
const GRACEFULL_INTERRUPTION_TIMEOUT_MS: u64 = 100;
/// Thin wrapper that exposes the parts of [`Session`] task runners need.
#[derive(Clone)]
pub(crate) struct SessionTaskContext {
@@ -49,6 +57,7 @@ pub(crate) trait SessionTask: Send + Sync + 'static {
ctx: Arc<TurnContext>,
sub_id: String,
input: Vec<InputItem>,
cancellation_token: CancellationToken,
) -> Option<String>;
async fn abort(&self, session: Arc<SessionTaskContext>, sub_id: &str) {
@@ -69,26 +78,42 @@ impl Session {
let task: Arc<dyn SessionTask> = Arc::new(task);
let task_kind = task.kind();
let cancellation_token = CancellationToken::new();
let done = Arc::new(Notify::new());
let done_clone = Arc::clone(&done);
let handle = {
let session_ctx = Arc::new(SessionTaskContext::new(Arc::clone(self)));
let ctx = Arc::clone(&turn_context);
let task_for_run = Arc::clone(&task);
let sub_clone = sub_id.clone();
let task_cancellation_token = cancellation_token.child_token();
tokio::spawn(async move {
let last_agent_message = task_for_run
.run(Arc::clone(&session_ctx), ctx, sub_clone.clone(), input)
.run(
Arc::clone(&session_ctx),
ctx,
sub_clone.clone(),
input,
task_cancellation_token.child_token(),
)
.await;
// Emit completion uniformly from spawn site so all tasks share the same lifecycle.
let sess = session_ctx.clone_session();
sess.on_task_finished(sub_clone, last_agent_message).await;
if !task_cancellation_token.is_cancelled() {
// Emit completion uniformly from spawn site so all tasks share the same lifecycle.
let sess = session_ctx.clone_session();
sess.on_task_finished(sub_clone, last_agent_message).await;
}
done_clone.notify_waiters();
})
.abort_handle()
};
let running_task = RunningTask {
handle,
done,
handle: Arc::new(AbortOnDropHandle::new(handle)),
kind: task_kind,
task,
cancellation_token,
};
self.register_new_active_task(sub_id, running_task).await;
}
@@ -143,14 +168,24 @@ impl Session {
task: RunningTask,
reason: TurnAbortReason,
) {
if task.handle.is_finished() {
if task.cancellation_token.is_cancelled() {
return;
}
trace!(task_kind = ?task.kind, sub_id, "aborting running task");
task.cancellation_token.cancel();
let session_task = task.task;
let handle = task.handle;
handle.abort();
select! {
_ = task.done.notified() => {
},
_ = tokio::time::sleep(Duration::from_millis(GRACEFULL_INTERRUPTION_TIMEOUT_MS)) => {
warn!("task {sub_id} didn't complete gracefully after {}ms", GRACEFULL_INTERRUPTION_TIMEOUT_MS);
}
}
task.handle.abort();
let session_ctx = Arc::new(SessionTaskContext::new(Arc::clone(self)));
session_task.abort(session_ctx, &sub_id).await;

View File

@@ -1,6 +1,7 @@
use std::sync::Arc;
use async_trait::async_trait;
use tokio_util::sync::CancellationToken;
use crate::codex::TurnContext;
use crate::codex::run_task;
@@ -25,8 +26,17 @@ impl SessionTask for RegularTask {
ctx: Arc<TurnContext>,
sub_id: String,
input: Vec<InputItem>,
cancellation_token: CancellationToken,
) -> Option<String> {
let sess = session.clone_session();
run_task(sess, ctx, sub_id, input, TaskKind::Regular).await
run_task(
sess,
ctx,
sub_id,
input,
TaskKind::Regular,
cancellation_token,
)
.await
}
}

View File

@@ -1,6 +1,7 @@
use std::sync::Arc;
use async_trait::async_trait;
use tokio_util::sync::CancellationToken;
use crate::codex::TurnContext;
use crate::codex::exit_review_mode;
@@ -26,9 +27,18 @@ impl SessionTask for ReviewTask {
ctx: Arc<TurnContext>,
sub_id: String,
input: Vec<InputItem>,
cancellation_token: CancellationToken,
) -> Option<String> {
let sess = session.clone_session();
run_task(sess, ctx, sub_id, input, TaskKind::Review).await
run_task(
sess,
ctx,
sub_id,
input,
TaskKind::Review,
cancellation_token,
)
.await
}
async fn abort(&self, session: Arc<SessionTaskContext>, sub_id: &str) {