Delegate review to codex instance (#5572)
In this PR, I am exploring migrating task kind to an invocation of Codex. The main reason would be getting rid off multiple `ConversationHistory` state and streamlining our context/history management. This approach depends on opening a channel between the sub-codex and codex. This channel is responsible for forwarding `interactive` (`approvals`) and `non-interactive` events. The `task` is responsible for handling those events. This opens the door for implementing `codex as a tool`, replacing `compact` and `review`, and potentially subagents. One consideration is this code is very similar to `app-server` specially in the approval part. If in the future we wanted an interactive `sub-codex` we should consider using `codex-mcp`
This commit is contained in:
@@ -61,7 +61,13 @@ pub(crate) async fn apply_patch(
|
|||||||
// that similar patches can be auto-approved in the future during
|
// that similar patches can be auto-approved in the future during
|
||||||
// this session.
|
// this session.
|
||||||
let rx_approve = sess
|
let rx_approve = sess
|
||||||
.request_patch_approval(turn_context, call_id.to_owned(), &action, None, None)
|
.request_patch_approval(
|
||||||
|
turn_context,
|
||||||
|
call_id.to_owned(),
|
||||||
|
convert_apply_patch_to_protocol(&action),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
match rx_approve.await.unwrap_or_default() {
|
match rx_approve.await.unwrap_or_default() {
|
||||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
|
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ use codex_protocol::models::ContentItem;
|
|||||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||||
use codex_protocol::models::ReasoningItemContent;
|
use codex_protocol::models::ReasoningItemContent;
|
||||||
use codex_protocol::models::ResponseItem;
|
use codex_protocol::models::ResponseItem;
|
||||||
|
use codex_protocol::protocol::SessionSource;
|
||||||
use eventsource_stream::Eventsource;
|
use eventsource_stream::Eventsource;
|
||||||
use futures::Stream;
|
use futures::Stream;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
@@ -41,6 +42,7 @@ pub(crate) async fn stream_chat_completions(
|
|||||||
client: &CodexHttpClient,
|
client: &CodexHttpClient,
|
||||||
provider: &ModelProviderInfo,
|
provider: &ModelProviderInfo,
|
||||||
otel_event_manager: &OtelEventManager,
|
otel_event_manager: &OtelEventManager,
|
||||||
|
session_source: &SessionSource,
|
||||||
) -> Result<ResponseStream> {
|
) -> Result<ResponseStream> {
|
||||||
if prompt.output_schema.is_some() {
|
if prompt.output_schema.is_some() {
|
||||||
return Err(CodexErr::UnsupportedOperation(
|
return Err(CodexErr::UnsupportedOperation(
|
||||||
@@ -343,7 +345,15 @@ pub(crate) async fn stream_chat_completions(
|
|||||||
loop {
|
loop {
|
||||||
attempt += 1;
|
attempt += 1;
|
||||||
|
|
||||||
let req_builder = provider.create_request_builder(client, &None).await?;
|
let mut req_builder = provider.create_request_builder(client, &None).await?;
|
||||||
|
|
||||||
|
// Include session source for backend telemetry and routing.
|
||||||
|
let task_type = match serde_json::to_value(session_source) {
|
||||||
|
Ok(serde_json::Value::String(s)) => s,
|
||||||
|
Ok(other) => other.to_string(),
|
||||||
|
Err(_) => "unknown".to_string(),
|
||||||
|
};
|
||||||
|
req_builder = req_builder.header("Codex-Task-Type", task_type);
|
||||||
|
|
||||||
let res = otel_event_manager
|
let res = otel_event_manager
|
||||||
.log_request(attempt, || {
|
.log_request(attempt, || {
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ use codex_protocol::ConversationId;
|
|||||||
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
|
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||||
use codex_protocol::models::ResponseItem;
|
use codex_protocol::models::ResponseItem;
|
||||||
|
use codex_protocol::protocol::SessionSource;
|
||||||
use eventsource_stream::Eventsource;
|
use eventsource_stream::Eventsource;
|
||||||
use futures::prelude::*;
|
use futures::prelude::*;
|
||||||
use regex_lite::Regex;
|
use regex_lite::Regex;
|
||||||
@@ -56,7 +57,6 @@ use crate::openai_model_info::get_model_info;
|
|||||||
use crate::protocol::RateLimitSnapshot;
|
use crate::protocol::RateLimitSnapshot;
|
||||||
use crate::protocol::RateLimitWindow;
|
use crate::protocol::RateLimitWindow;
|
||||||
use crate::protocol::TokenUsage;
|
use crate::protocol::TokenUsage;
|
||||||
use crate::state::TaskKind;
|
|
||||||
use crate::token_data::PlanType;
|
use crate::token_data::PlanType;
|
||||||
use crate::tools::spec::create_tools_json_for_responses_api;
|
use crate::tools::spec::create_tools_json_for_responses_api;
|
||||||
use crate::util::backoff;
|
use crate::util::backoff;
|
||||||
@@ -87,8 +87,10 @@ pub struct ModelClient {
|
|||||||
conversation_id: ConversationId,
|
conversation_id: ConversationId,
|
||||||
effort: Option<ReasoningEffortConfig>,
|
effort: Option<ReasoningEffortConfig>,
|
||||||
summary: ReasoningSummaryConfig,
|
summary: ReasoningSummaryConfig,
|
||||||
|
session_source: SessionSource,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
impl ModelClient {
|
impl ModelClient {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
config: Arc<Config>,
|
config: Arc<Config>,
|
||||||
@@ -98,6 +100,7 @@ impl ModelClient {
|
|||||||
effort: Option<ReasoningEffortConfig>,
|
effort: Option<ReasoningEffortConfig>,
|
||||||
summary: ReasoningSummaryConfig,
|
summary: ReasoningSummaryConfig,
|
||||||
conversation_id: ConversationId,
|
conversation_id: ConversationId,
|
||||||
|
session_source: SessionSource,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let client = create_client();
|
let client = create_client();
|
||||||
|
|
||||||
@@ -110,6 +113,7 @@ impl ModelClient {
|
|||||||
conversation_id,
|
conversation_id,
|
||||||
effort,
|
effort,
|
||||||
summary,
|
summary,
|
||||||
|
session_source,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -127,13 +131,6 @@ impl ModelClient {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Dispatches to either the Responses or Chat implementation depending on
|
|
||||||
/// the provider config. Public callers always invoke `stream()` – the
|
|
||||||
/// specialised helpers are private to avoid accidental misuse.
|
|
||||||
pub async fn stream(&self, prompt: &Prompt) -> Result<ResponseStream> {
|
|
||||||
self.stream_with_task_kind(prompt, TaskKind::Regular).await
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn config(&self) -> Arc<Config> {
|
pub fn config(&self) -> Arc<Config> {
|
||||||
Arc::clone(&self.config)
|
Arc::clone(&self.config)
|
||||||
}
|
}
|
||||||
@@ -142,13 +139,9 @@ impl ModelClient {
|
|||||||
&self.provider
|
&self.provider
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn stream_with_task_kind(
|
pub async fn stream(&self, prompt: &Prompt) -> Result<ResponseStream> {
|
||||||
&self,
|
|
||||||
prompt: &Prompt,
|
|
||||||
task_kind: TaskKind,
|
|
||||||
) -> Result<ResponseStream> {
|
|
||||||
match self.provider.wire_api {
|
match self.provider.wire_api {
|
||||||
WireApi::Responses => self.stream_responses(prompt, task_kind).await,
|
WireApi::Responses => self.stream_responses(prompt).await,
|
||||||
WireApi::Chat => {
|
WireApi::Chat => {
|
||||||
// Create the raw streaming connection first.
|
// Create the raw streaming connection first.
|
||||||
let response_stream = stream_chat_completions(
|
let response_stream = stream_chat_completions(
|
||||||
@@ -157,6 +150,7 @@ impl ModelClient {
|
|||||||
&self.client,
|
&self.client,
|
||||||
&self.provider,
|
&self.provider,
|
||||||
&self.otel_event_manager,
|
&self.otel_event_manager,
|
||||||
|
&self.session_source,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
@@ -189,11 +183,7 @@ impl ModelClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Implementation for the OpenAI *Responses* experimental API.
|
/// Implementation for the OpenAI *Responses* experimental API.
|
||||||
async fn stream_responses(
|
async fn stream_responses(&self, prompt: &Prompt) -> Result<ResponseStream> {
|
||||||
&self,
|
|
||||||
prompt: &Prompt,
|
|
||||||
task_kind: TaskKind,
|
|
||||||
) -> Result<ResponseStream> {
|
|
||||||
if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
|
if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
|
||||||
// short circuit for tests
|
// short circuit for tests
|
||||||
warn!(path, "Streaming from fixture");
|
warn!(path, "Streaming from fixture");
|
||||||
@@ -268,7 +258,7 @@ impl ModelClient {
|
|||||||
let max_attempts = self.provider.request_max_retries();
|
let max_attempts = self.provider.request_max_retries();
|
||||||
for attempt in 0..=max_attempts {
|
for attempt in 0..=max_attempts {
|
||||||
match self
|
match self
|
||||||
.attempt_stream_responses(attempt, &payload_json, &auth_manager, task_kind)
|
.attempt_stream_responses(attempt, &payload_json, &auth_manager)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(stream) => {
|
Ok(stream) => {
|
||||||
@@ -296,7 +286,6 @@ impl ModelClient {
|
|||||||
attempt: u64,
|
attempt: u64,
|
||||||
payload_json: &Value,
|
payload_json: &Value,
|
||||||
auth_manager: &Option<Arc<AuthManager>>,
|
auth_manager: &Option<Arc<AuthManager>>,
|
||||||
task_kind: TaskKind,
|
|
||||||
) -> std::result::Result<ResponseStream, StreamAttemptError> {
|
) -> std::result::Result<ResponseStream, StreamAttemptError> {
|
||||||
// Always fetch the latest auth in case a prior attempt refreshed the token.
|
// Always fetch the latest auth in case a prior attempt refreshed the token.
|
||||||
let auth = auth_manager.as_ref().and_then(|m| m.auth());
|
let auth = auth_manager.as_ref().and_then(|m| m.auth());
|
||||||
@@ -314,12 +303,19 @@ impl ModelClient {
|
|||||||
.await
|
.await
|
||||||
.map_err(StreamAttemptError::Fatal)?;
|
.map_err(StreamAttemptError::Fatal)?;
|
||||||
|
|
||||||
|
// Include session source for backend telemetry and routing.
|
||||||
|
let task_type = match serde_json::to_value(&self.session_source) {
|
||||||
|
Ok(serde_json::Value::String(s)) => s,
|
||||||
|
Ok(other) => other.to_string(),
|
||||||
|
Err(_) => "unknown".to_string(),
|
||||||
|
};
|
||||||
|
req_builder = req_builder.header("Codex-Task-Type", task_type);
|
||||||
|
|
||||||
req_builder = req_builder
|
req_builder = req_builder
|
||||||
// Send session_id for compatibility.
|
// Send session_id for compatibility.
|
||||||
.header("conversation_id", self.conversation_id.to_string())
|
.header("conversation_id", self.conversation_id.to_string())
|
||||||
.header("session_id", self.conversation_id.to_string())
|
.header("session_id", self.conversation_id.to_string())
|
||||||
.header(reqwest::header::ACCEPT, "text/event-stream")
|
.header(reqwest::header::ACCEPT, "text/event-stream")
|
||||||
.header("Codex-Task-Type", task_kind.header_value())
|
|
||||||
.json(payload_json);
|
.json(payload_json);
|
||||||
|
|
||||||
if let Some(auth) = auth.as_ref()
|
if let Some(auth) = auth.as_ref()
|
||||||
@@ -462,6 +458,10 @@ impl ModelClient {
|
|||||||
self.otel_event_manager.clone()
|
self.otel_event_manager.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_session_source(&self) -> SessionSource {
|
||||||
|
self.session_source.clone()
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the currently configured model slug.
|
/// Returns the currently configured model slug.
|
||||||
pub fn get_model(&self) -> String {
|
pub fn get_model(&self) -> String {
|
||||||
self.config.model.clone()
|
self.config.model.clone()
|
||||||
|
|||||||
@@ -23,6 +23,11 @@ use tokio::sync::mpsc;
|
|||||||
/// Review thread system prompt. Edit `core/src/review_prompt.md` to customize.
|
/// Review thread system prompt. Edit `core/src/review_prompt.md` to customize.
|
||||||
pub const REVIEW_PROMPT: &str = include_str!("../review_prompt.md");
|
pub const REVIEW_PROMPT: &str = include_str!("../review_prompt.md");
|
||||||
|
|
||||||
|
// Centralized templates for review-related user messages
|
||||||
|
pub const REVIEW_EXIT_SUCCESS_TMPL: &str = include_str!("../templates/review/exit_success.xml");
|
||||||
|
pub const REVIEW_EXIT_INTERRUPTED_TMPL: &str =
|
||||||
|
include_str!("../templates/review/exit_interrupted.xml");
|
||||||
|
|
||||||
/// API request payload for a single model turn
|
/// API request payload for a single model turn
|
||||||
#[derive(Default, Debug, Clone)]
|
#[derive(Default, Debug, Clone)]
|
||||||
pub struct Prompt {
|
pub struct Prompt {
|
||||||
|
|||||||
@@ -6,21 +6,20 @@ use std::sync::atomic::AtomicU64;
|
|||||||
|
|
||||||
use crate::AuthManager;
|
use crate::AuthManager;
|
||||||
use crate::client_common::REVIEW_PROMPT;
|
use crate::client_common::REVIEW_PROMPT;
|
||||||
|
use crate::features::Feature;
|
||||||
use crate::function_tool::FunctionCallError;
|
use crate::function_tool::FunctionCallError;
|
||||||
use crate::mcp::auth::McpAuthStatusEntry;
|
use crate::mcp::auth::McpAuthStatusEntry;
|
||||||
use crate::mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
|
use crate::mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
|
||||||
use crate::parse_command::parse_command;
|
use crate::parse_command::parse_command;
|
||||||
use crate::parse_turn_item;
|
use crate::parse_turn_item;
|
||||||
use crate::response_processing::process_items;
|
use crate::response_processing::process_items;
|
||||||
use crate::review_format::format_review_findings_block;
|
|
||||||
use crate::terminal;
|
use crate::terminal;
|
||||||
use crate::user_notification::UserNotifier;
|
use crate::user_notification::UserNotifier;
|
||||||
use async_channel::Receiver;
|
use async_channel::Receiver;
|
||||||
use async_channel::Sender;
|
use async_channel::Sender;
|
||||||
use codex_apply_patch::ApplyPatchAction;
|
|
||||||
use codex_protocol::ConversationId;
|
use codex_protocol::ConversationId;
|
||||||
use codex_protocol::items::TurnItem;
|
use codex_protocol::items::TurnItem;
|
||||||
use codex_protocol::protocol::ExitedReviewModeEvent;
|
use codex_protocol::protocol::FileChange;
|
||||||
use codex_protocol::protocol::ItemCompletedEvent;
|
use codex_protocol::protocol::ItemCompletedEvent;
|
||||||
use codex_protocol::protocol::ItemStartedEvent;
|
use codex_protocol::protocol::ItemStartedEvent;
|
||||||
use codex_protocol::protocol::RawResponseItemEvent;
|
use codex_protocol::protocol::RawResponseItemEvent;
|
||||||
@@ -48,11 +47,9 @@ use tokio_util::sync::CancellationToken;
|
|||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
use tracing::error;
|
use tracing::error;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
use tracing::trace;
|
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
||||||
use crate::ModelProviderInfo;
|
use crate::ModelProviderInfo;
|
||||||
use crate::apply_patch::convert_apply_patch_to_protocol;
|
|
||||||
use crate::client::ModelClient;
|
use crate::client::ModelClient;
|
||||||
use crate::client_common::Prompt;
|
use crate::client_common::Prompt;
|
||||||
use crate::client_common::ResponseEvent;
|
use crate::client_common::ResponseEvent;
|
||||||
@@ -87,7 +84,6 @@ use crate::protocol::ExecApprovalRequestEvent;
|
|||||||
use crate::protocol::Op;
|
use crate::protocol::Op;
|
||||||
use crate::protocol::RateLimitSnapshot;
|
use crate::protocol::RateLimitSnapshot;
|
||||||
use crate::protocol::ReviewDecision;
|
use crate::protocol::ReviewDecision;
|
||||||
use crate::protocol::ReviewOutputEvent;
|
|
||||||
use crate::protocol::SandboxCommandAssessment;
|
use crate::protocol::SandboxCommandAssessment;
|
||||||
use crate::protocol::SandboxPolicy;
|
use crate::protocol::SandboxPolicy;
|
||||||
use crate::protocol::SessionConfiguredEvent;
|
use crate::protocol::SessionConfiguredEvent;
|
||||||
@@ -103,7 +99,6 @@ use crate::shell;
|
|||||||
use crate::state::ActiveTurn;
|
use crate::state::ActiveTurn;
|
||||||
use crate::state::SessionServices;
|
use crate::state::SessionServices;
|
||||||
use crate::state::SessionState;
|
use crate::state::SessionState;
|
||||||
use crate::state::TaskKind;
|
|
||||||
use crate::tasks::GhostSnapshotTask;
|
use crate::tasks::GhostSnapshotTask;
|
||||||
use crate::tasks::ReviewTask;
|
use crate::tasks::ReviewTask;
|
||||||
use crate::tasks::SessionTask;
|
use crate::tasks::SessionTask;
|
||||||
@@ -139,9 +134,9 @@ use self::compact::collect_user_messages;
|
|||||||
/// The high-level interface to the Codex system.
|
/// The high-level interface to the Codex system.
|
||||||
/// It operates as a queue pair where you send submissions and receive events.
|
/// It operates as a queue pair where you send submissions and receive events.
|
||||||
pub struct Codex {
|
pub struct Codex {
|
||||||
next_id: AtomicU64,
|
pub(crate) next_id: AtomicU64,
|
||||||
tx_sub: Sender<Submission>,
|
pub(crate) tx_sub: Sender<Submission>,
|
||||||
rx_event: Receiver<Event>,
|
pub(crate) rx_event: Receiver<Event>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Wrapper returned by [`Codex::spawn`] containing the spawned [`Codex`],
|
/// Wrapper returned by [`Codex::spawn`] containing the spawned [`Codex`],
|
||||||
@@ -182,16 +177,18 @@ impl Codex {
|
|||||||
cwd: config.cwd.clone(),
|
cwd: config.cwd.clone(),
|
||||||
original_config_do_not_use: Arc::clone(&config),
|
original_config_do_not_use: Arc::clone(&config),
|
||||||
features: config.features.clone(),
|
features: config.features.clone(),
|
||||||
|
session_source,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Generate a unique ID for the lifetime of this Codex session.
|
// Generate a unique ID for the lifetime of this Codex session.
|
||||||
|
let session_source_clone = session_configuration.session_source.clone();
|
||||||
let session = Session::new(
|
let session = Session::new(
|
||||||
session_configuration,
|
session_configuration,
|
||||||
config.clone(),
|
config.clone(),
|
||||||
auth_manager.clone(),
|
auth_manager.clone(),
|
||||||
tx_event.clone(),
|
tx_event.clone(),
|
||||||
conversation_history,
|
conversation_history,
|
||||||
session_source,
|
session_source_clone,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
@@ -272,7 +269,6 @@ pub(crate) struct TurnContext {
|
|||||||
pub(crate) sandbox_policy: SandboxPolicy,
|
pub(crate) sandbox_policy: SandboxPolicy,
|
||||||
pub(crate) shell_environment_policy: ShellEnvironmentPolicy,
|
pub(crate) shell_environment_policy: ShellEnvironmentPolicy,
|
||||||
pub(crate) tools_config: ToolsConfig,
|
pub(crate) tools_config: ToolsConfig,
|
||||||
pub(crate) is_review_mode: bool,
|
|
||||||
pub(crate) final_output_json_schema: Option<Value>,
|
pub(crate) final_output_json_schema: Option<Value>,
|
||||||
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
|
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
|
||||||
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
|
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
|
||||||
@@ -286,6 +282,7 @@ impl TurnContext {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub(crate) struct SessionConfiguration {
|
pub(crate) struct SessionConfiguration {
|
||||||
/// Provider identifier ("openai", "openrouter", ...).
|
/// Provider identifier ("openai", "openrouter", ...).
|
||||||
@@ -322,6 +319,8 @@ pub(crate) struct SessionConfiguration {
|
|||||||
|
|
||||||
// TODO(pakrym): Remove config from here
|
// TODO(pakrym): Remove config from here
|
||||||
original_config_do_not_use: Arc<Config>,
|
original_config_do_not_use: Arc<Config>,
|
||||||
|
/// Source of the session (cli, vscode, exec, mcp, ...)
|
||||||
|
session_source: SessionSource,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SessionConfiguration {
|
impl SessionConfiguration {
|
||||||
@@ -394,6 +393,7 @@ impl Session {
|
|||||||
session_configuration.model_reasoning_effort,
|
session_configuration.model_reasoning_effort,
|
||||||
session_configuration.model_reasoning_summary,
|
session_configuration.model_reasoning_summary,
|
||||||
conversation_id,
|
conversation_id,
|
||||||
|
session_configuration.session_source.clone(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
@@ -411,7 +411,6 @@ impl Session {
|
|||||||
sandbox_policy: session_configuration.sandbox_policy.clone(),
|
sandbox_policy: session_configuration.sandbox_policy.clone(),
|
||||||
shell_environment_policy: config.shell_environment_policy.clone(),
|
shell_environment_policy: config.shell_environment_policy.clone(),
|
||||||
tools_config,
|
tools_config,
|
||||||
is_review_mode: false,
|
|
||||||
final_output_json_schema: None,
|
final_output_json_schema: None,
|
||||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||||
@@ -816,6 +815,7 @@ impl Session {
|
|||||||
auth_manager,
|
auth_manager,
|
||||||
&otel,
|
&otel,
|
||||||
self.conversation_id,
|
self.conversation_id,
|
||||||
|
turn_context.client.get_session_source(),
|
||||||
call_id,
|
call_id,
|
||||||
command,
|
command,
|
||||||
&turn_context.sandbox_policy,
|
&turn_context.sandbox_policy,
|
||||||
@@ -874,7 +874,7 @@ impl Session {
|
|||||||
&self,
|
&self,
|
||||||
turn_context: &TurnContext,
|
turn_context: &TurnContext,
|
||||||
call_id: String,
|
call_id: String,
|
||||||
action: &ApplyPatchAction,
|
changes: HashMap<PathBuf, FileChange>,
|
||||||
reason: Option<String>,
|
reason: Option<String>,
|
||||||
grant_root: Option<PathBuf>,
|
grant_root: Option<PathBuf>,
|
||||||
) -> oneshot::Receiver<ReviewDecision> {
|
) -> oneshot::Receiver<ReviewDecision> {
|
||||||
@@ -898,7 +898,7 @@ impl Session {
|
|||||||
|
|
||||||
let event = EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
|
let event = EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
|
||||||
call_id,
|
call_id,
|
||||||
changes: convert_apply_patch_to_protocol(action),
|
changes,
|
||||||
reason,
|
reason,
|
||||||
grant_root,
|
grant_root,
|
||||||
});
|
});
|
||||||
@@ -1120,18 +1120,16 @@ impl Session {
|
|||||||
turn_context: Arc<TurnContext>,
|
turn_context: Arc<TurnContext>,
|
||||||
cancellation_token: CancellationToken,
|
cancellation_token: CancellationToken,
|
||||||
) {
|
) {
|
||||||
if turn_context.is_review_mode
|
if !self
|
||||||
|| !self
|
.state
|
||||||
.state
|
.lock()
|
||||||
.lock()
|
.await
|
||||||
.await
|
.session_configuration
|
||||||
.session_configuration
|
.features
|
||||||
.features
|
.enabled(Feature::GhostCommit)
|
||||||
.enabled(Feature::GhostCommit)
|
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
let token = match turn_context.tool_call_gate.subscribe().await {
|
let token = match turn_context.tool_call_gate.subscribe().await {
|
||||||
Ok(token) => token,
|
Ok(token) => token,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
@@ -1633,9 +1631,10 @@ async fn spawn_review_thread(
|
|||||||
.unwrap_or_else(|| parent_turn_context.client.get_model_family());
|
.unwrap_or_else(|| parent_turn_context.client.get_model_family());
|
||||||
// For reviews, disable web_search and view_image regardless of global settings.
|
// For reviews, disable web_search and view_image regardless of global settings.
|
||||||
let mut review_features = config.features.clone();
|
let mut review_features = config.features.clone();
|
||||||
review_features.disable(crate::features::Feature::WebSearchRequest);
|
review_features
|
||||||
review_features.disable(crate::features::Feature::ViewImageTool);
|
.disable(crate::features::Feature::WebSearchRequest)
|
||||||
review_features.disable(crate::features::Feature::StreamableShell);
|
.disable(crate::features::Feature::ViewImageTool)
|
||||||
|
.disable(crate::features::Feature::StreamableShell);
|
||||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &review_model_family,
|
model_family: &review_model_family,
|
||||||
features: &review_features,
|
features: &review_features,
|
||||||
@@ -1674,6 +1673,7 @@ async fn spawn_review_thread(
|
|||||||
per_turn_config.model_reasoning_effort,
|
per_turn_config.model_reasoning_effort,
|
||||||
per_turn_config.model_reasoning_summary,
|
per_turn_config.model_reasoning_summary,
|
||||||
sess.conversation_id,
|
sess.conversation_id,
|
||||||
|
parent_turn_context.client.get_session_source(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let review_turn_context = TurnContext {
|
let review_turn_context = TurnContext {
|
||||||
@@ -1686,7 +1686,6 @@ async fn spawn_review_thread(
|
|||||||
sandbox_policy: parent_turn_context.sandbox_policy.clone(),
|
sandbox_policy: parent_turn_context.sandbox_policy.clone(),
|
||||||
shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
|
shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
|
||||||
cwd: parent_turn_context.cwd.clone(),
|
cwd: parent_turn_context.cwd.clone(),
|
||||||
is_review_mode: true,
|
|
||||||
final_output_json_schema: None,
|
final_output_json_schema: None,
|
||||||
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
||||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||||
@@ -1718,14 +1717,10 @@ async fn spawn_review_thread(
|
|||||||
/// - If the model sends only an assistant message, we record it in the
|
/// - If the model sends only an assistant message, we record it in the
|
||||||
/// conversation history and consider the task complete.
|
/// conversation history and consider the task complete.
|
||||||
///
|
///
|
||||||
/// Review mode: when `turn_context.is_review_mode` is true, the turn runs in an
|
|
||||||
/// isolated in-memory thread without the parent session's prior history or
|
|
||||||
/// user_instructions. Emits ExitedReviewMode upon final review message.
|
|
||||||
pub(crate) async fn run_task(
|
pub(crate) async fn run_task(
|
||||||
sess: Arc<Session>,
|
sess: Arc<Session>,
|
||||||
turn_context: Arc<TurnContext>,
|
turn_context: Arc<TurnContext>,
|
||||||
input: Vec<UserInput>,
|
input: Vec<UserInput>,
|
||||||
task_kind: TaskKind,
|
|
||||||
cancellation_token: CancellationToken,
|
cancellation_token: CancellationToken,
|
||||||
) -> Option<String> {
|
) -> Option<String> {
|
||||||
if input.is_empty() {
|
if input.is_empty() {
|
||||||
@@ -1737,21 +1732,8 @@ pub(crate) async fn run_task(
|
|||||||
sess.send_event(&turn_context, event).await;
|
sess.send_event(&turn_context, event).await;
|
||||||
|
|
||||||
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
|
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
|
||||||
// For review threads, keep an isolated in-memory history so the
|
sess.record_input_and_rollout_usermsg(turn_context.as_ref(), &initial_input_for_turn)
|
||||||
// model sees a fresh conversation without the parent session's history.
|
.await;
|
||||||
// For normal turns, continue recording to the session history as before.
|
|
||||||
let is_review_mode = turn_context.is_review_mode;
|
|
||||||
|
|
||||||
let mut review_thread_history: ConversationHistory = ConversationHistory::new();
|
|
||||||
if is_review_mode {
|
|
||||||
// Seed review threads with environment context so the model knows the working directory.
|
|
||||||
review_thread_history
|
|
||||||
.record_items(sess.build_initial_context(turn_context.as_ref()).iter());
|
|
||||||
review_thread_history.record_items(std::iter::once(&initial_input_for_turn.into()));
|
|
||||||
} else {
|
|
||||||
sess.record_input_and_rollout_usermsg(turn_context.as_ref(), &initial_input_for_turn)
|
|
||||||
.await;
|
|
||||||
}
|
|
||||||
|
|
||||||
sess.maybe_start_ghost_snapshot(Arc::clone(&turn_context), cancellation_token.child_token())
|
sess.maybe_start_ghost_snapshot(Arc::clone(&turn_context), cancellation_token.child_token())
|
||||||
.await;
|
.await;
|
||||||
@@ -1773,21 +1755,7 @@ pub(crate) async fn run_task(
|
|||||||
.collect::<Vec<ResponseItem>>();
|
.collect::<Vec<ResponseItem>>();
|
||||||
|
|
||||||
// Construct the input that we will send to the model.
|
// Construct the input that we will send to the model.
|
||||||
//
|
let turn_input: Vec<ResponseItem> = {
|
||||||
// - For review threads, use the isolated in-memory history so the
|
|
||||||
// model sees a fresh conversation (no parent history/user_instructions).
|
|
||||||
//
|
|
||||||
// - For normal turns, use the session's full history. When using the
|
|
||||||
// chat completions API (or ZDR clients), the model needs the full
|
|
||||||
// conversation history on each turn. The rollout file, however, should
|
|
||||||
// only record the new items that originated in this turn so that it
|
|
||||||
// represents an append-only log without duplicates.
|
|
||||||
let turn_input: Vec<ResponseItem> = if is_review_mode {
|
|
||||||
if !pending_input.is_empty() {
|
|
||||||
review_thread_history.record_items(&pending_input);
|
|
||||||
}
|
|
||||||
review_thread_history.get_history_for_prompt()
|
|
||||||
} else {
|
|
||||||
sess.record_conversation_items(&turn_context, &pending_input)
|
sess.record_conversation_items(&turn_context, &pending_input)
|
||||||
.await;
|
.await;
|
||||||
sess.clone_history().await.get_history_for_prompt()
|
sess.clone_history().await.get_history_for_prompt()
|
||||||
@@ -1811,7 +1779,6 @@ pub(crate) async fn run_task(
|
|||||||
Arc::clone(&turn_context),
|
Arc::clone(&turn_context),
|
||||||
Arc::clone(&turn_diff_tracker),
|
Arc::clone(&turn_diff_tracker),
|
||||||
turn_input,
|
turn_input,
|
||||||
task_kind,
|
|
||||||
cancellation_token.child_token(),
|
cancellation_token.child_token(),
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
@@ -1831,14 +1798,8 @@ pub(crate) async fn run_task(
|
|||||||
let token_limit_reached = total_usage_tokens
|
let token_limit_reached = total_usage_tokens
|
||||||
.map(|tokens| tokens >= limit)
|
.map(|tokens| tokens >= limit)
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
let (responses, items_to_record_in_conversation_history) = process_items(
|
let (responses, items_to_record_in_conversation_history) =
|
||||||
processed_items,
|
process_items(processed_items, &sess, &turn_context).await;
|
||||||
is_review_mode,
|
|
||||||
&mut review_thread_history,
|
|
||||||
&sess,
|
|
||||||
&turn_context,
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
if token_limit_reached {
|
if token_limit_reached {
|
||||||
if auto_compact_recently_attempted {
|
if auto_compact_recently_attempted {
|
||||||
@@ -1880,14 +1841,7 @@ pub(crate) async fn run_task(
|
|||||||
Err(CodexErr::TurnAborted {
|
Err(CodexErr::TurnAborted {
|
||||||
dangling_artifacts: processed_items,
|
dangling_artifacts: processed_items,
|
||||||
}) => {
|
}) => {
|
||||||
let _ = process_items(
|
let _ = process_items(processed_items, &sess, &turn_context).await;
|
||||||
processed_items,
|
|
||||||
is_review_mode,
|
|
||||||
&mut review_thread_history,
|
|
||||||
&sess,
|
|
||||||
&turn_context,
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
// Aborted turn is reported via a different event.
|
// Aborted turn is reported via a different event.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -1903,56 +1857,14 @@ pub(crate) async fn run_task(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this was a review thread and we have a final assistant message,
|
|
||||||
// try to parse it as a ReviewOutput.
|
|
||||||
//
|
|
||||||
// If parsing fails, construct a minimal ReviewOutputEvent using the plain
|
|
||||||
// text as the overall explanation. Else, just exit review mode with None.
|
|
||||||
//
|
|
||||||
// Emits an ExitedReviewMode event with the parsed review output.
|
|
||||||
if turn_context.is_review_mode {
|
|
||||||
exit_review_mode(
|
|
||||||
sess.clone(),
|
|
||||||
Arc::clone(&turn_context),
|
|
||||||
last_agent_message.as_deref().map(parse_review_output_event),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
}
|
|
||||||
|
|
||||||
last_agent_message
|
last_agent_message
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse the review output; when not valid JSON, build a structured
|
|
||||||
/// fallback that carries the plain text as the overall explanation.
|
|
||||||
///
|
|
||||||
/// Returns: a ReviewOutputEvent parsed from JSON or a fallback populated from text.
|
|
||||||
fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
|
|
||||||
// Try direct parse first
|
|
||||||
if let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(text) {
|
|
||||||
return ev;
|
|
||||||
}
|
|
||||||
// If wrapped in markdown fences or extra prose, attempt to extract the first JSON object
|
|
||||||
if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}'))
|
|
||||||
&& start < end
|
|
||||||
&& let Some(slice) = text.get(start..=end)
|
|
||||||
&& let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(slice)
|
|
||||||
{
|
|
||||||
return ev;
|
|
||||||
}
|
|
||||||
// Not JSON – return a structured ReviewOutputEvent that carries
|
|
||||||
// the plain text as the overall explanation.
|
|
||||||
ReviewOutputEvent {
|
|
||||||
overall_explanation: text.to_string(),
|
|
||||||
..Default::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn run_turn(
|
async fn run_turn(
|
||||||
sess: Arc<Session>,
|
sess: Arc<Session>,
|
||||||
turn_context: Arc<TurnContext>,
|
turn_context: Arc<TurnContext>,
|
||||||
turn_diff_tracker: SharedTurnDiffTracker,
|
turn_diff_tracker: SharedTurnDiffTracker,
|
||||||
input: Vec<ResponseItem>,
|
input: Vec<ResponseItem>,
|
||||||
task_kind: TaskKind,
|
|
||||||
cancellation_token: CancellationToken,
|
cancellation_token: CancellationToken,
|
||||||
) -> CodexResult<TurnRunResult> {
|
) -> CodexResult<TurnRunResult> {
|
||||||
let mcp_tools = sess.services.mcp_connection_manager.list_all_tools();
|
let mcp_tools = sess.services.mcp_connection_manager.list_all_tools();
|
||||||
@@ -1982,7 +1894,6 @@ async fn run_turn(
|
|||||||
Arc::clone(&turn_context),
|
Arc::clone(&turn_context),
|
||||||
Arc::clone(&turn_diff_tracker),
|
Arc::clone(&turn_diff_tracker),
|
||||||
&prompt,
|
&prompt,
|
||||||
task_kind,
|
|
||||||
cancellation_token.child_token(),
|
cancellation_token.child_token(),
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
@@ -2065,7 +1976,6 @@ async fn try_run_turn(
|
|||||||
turn_context: Arc<TurnContext>,
|
turn_context: Arc<TurnContext>,
|
||||||
turn_diff_tracker: SharedTurnDiffTracker,
|
turn_diff_tracker: SharedTurnDiffTracker,
|
||||||
prompt: &Prompt,
|
prompt: &Prompt,
|
||||||
task_kind: TaskKind,
|
|
||||||
cancellation_token: CancellationToken,
|
cancellation_token: CancellationToken,
|
||||||
) -> CodexResult<TurnRunResult> {
|
) -> CodexResult<TurnRunResult> {
|
||||||
let rollout_item = RolloutItem::TurnContext(TurnContextItem {
|
let rollout_item = RolloutItem::TurnContext(TurnContextItem {
|
||||||
@@ -2081,7 +1991,7 @@ async fn try_run_turn(
|
|||||||
let mut stream = turn_context
|
let mut stream = turn_context
|
||||||
.client
|
.client
|
||||||
.clone()
|
.clone()
|
||||||
.stream_with_task_kind(prompt, task_kind)
|
.stream(prompt)
|
||||||
.or_cancel(&cancellation_token)
|
.or_cancel(&cancellation_token)
|
||||||
.await??;
|
.await??;
|
||||||
|
|
||||||
@@ -2231,14 +2141,8 @@ async fn try_run_turn(
|
|||||||
return Ok(result);
|
return Ok(result);
|
||||||
}
|
}
|
||||||
ResponseEvent::OutputTextDelta(delta) => {
|
ResponseEvent::OutputTextDelta(delta) => {
|
||||||
// In review child threads, suppress assistant text deltas; the
|
let event = EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta });
|
||||||
// UI will show a selection popup from the final ReviewOutput.
|
sess.send_event(&turn_context, event).await;
|
||||||
if !turn_context.is_review_mode {
|
|
||||||
let event = EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta });
|
|
||||||
sess.send_event(&turn_context, event).await;
|
|
||||||
} else {
|
|
||||||
trace!("suppressing OutputTextDelta in review mode");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
ResponseEvent::ReasoningSummaryDelta(delta) => {
|
ResponseEvent::ReasoningSummaryDelta(delta) => {
|
||||||
let event = EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta });
|
let event = EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta });
|
||||||
@@ -2273,13 +2177,7 @@ async fn handle_non_tool_response_item(
|
|||||||
ResponseItem::Message { .. }
|
ResponseItem::Message { .. }
|
||||||
| ResponseItem::Reasoning { .. }
|
| ResponseItem::Reasoning { .. }
|
||||||
| ResponseItem::WebSearchCall { .. } => {
|
| ResponseItem::WebSearchCall { .. } => {
|
||||||
let turn_item = match &item {
|
let turn_item = parse_turn_item(&item);
|
||||||
ResponseItem::Message { .. } if turn_context.is_review_mode => {
|
|
||||||
trace!("suppressing assistant Message in review mode");
|
|
||||||
None
|
|
||||||
}
|
|
||||||
_ => parse_turn_item(&item),
|
|
||||||
};
|
|
||||||
if let Some(turn_item) = turn_item {
|
if let Some(turn_item) = turn_item {
|
||||||
sess.emit_turn_item_started_completed(
|
sess.emit_turn_item_started_completed(
|
||||||
turn_context.as_ref(),
|
turn_context.as_ref(),
|
||||||
@@ -2317,62 +2215,6 @@ pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
/// Emits an ExitedReviewMode Event with optional ReviewOutput,
|
|
||||||
/// and records a developer message with the review output.
|
|
||||||
pub(crate) async fn exit_review_mode(
|
|
||||||
session: Arc<Session>,
|
|
||||||
turn_context: Arc<TurnContext>,
|
|
||||||
review_output: Option<ReviewOutputEvent>,
|
|
||||||
) {
|
|
||||||
let event = EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
|
|
||||||
review_output: review_output.clone(),
|
|
||||||
});
|
|
||||||
session.send_event(turn_context.as_ref(), event).await;
|
|
||||||
|
|
||||||
let mut user_message = String::new();
|
|
||||||
if let Some(out) = review_output {
|
|
||||||
let mut findings_str = String::new();
|
|
||||||
let text = out.overall_explanation.trim();
|
|
||||||
if !text.is_empty() {
|
|
||||||
findings_str.push_str(text);
|
|
||||||
}
|
|
||||||
if !out.findings.is_empty() {
|
|
||||||
let block = format_review_findings_block(&out.findings, None);
|
|
||||||
findings_str.push_str(&format!("\n{block}"));
|
|
||||||
}
|
|
||||||
user_message.push_str(&format!(
|
|
||||||
r#"<user_action>
|
|
||||||
<context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
|
|
||||||
<action>review</action>
|
|
||||||
<results>
|
|
||||||
{findings_str}
|
|
||||||
</results>
|
|
||||||
</user_action>
|
|
||||||
"#));
|
|
||||||
} else {
|
|
||||||
user_message.push_str(r#"<user_action>
|
|
||||||
<context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
|
|
||||||
<action>review</action>
|
|
||||||
<results>
|
|
||||||
None.
|
|
||||||
</results>
|
|
||||||
</user_action>
|
|
||||||
"#);
|
|
||||||
}
|
|
||||||
|
|
||||||
session
|
|
||||||
.record_conversation_items(
|
|
||||||
&turn_context,
|
|
||||||
&[ResponseItem::Message {
|
|
||||||
id: None,
|
|
||||||
role: "user".to_string(),
|
|
||||||
content: vec![ContentItem::InputText { text: user_message }],
|
|
||||||
}],
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
// Make the recorded review note visible immediately for readers.
|
|
||||||
session.flush_rollout().await;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn mcp_init_error_display(
|
fn mcp_init_error_display(
|
||||||
server_name: &str,
|
server_name: &str,
|
||||||
@@ -2427,7 +2269,6 @@ fn is_mcp_client_startup_timeout_error(error: &anyhow::Error) -> bool {
|
|||||||
|| error_message.contains("timed out handshaking with MCP server")
|
|| error_message.contains("timed out handshaking with MCP server")
|
||||||
}
|
}
|
||||||
|
|
||||||
use crate::features::Feature;
|
|
||||||
use crate::features::Features;
|
use crate::features::Features;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) use tests::make_session_and_context;
|
pub(crate) use tests::make_session_and_context;
|
||||||
@@ -2665,6 +2506,7 @@ mod tests {
|
|||||||
cwd: config.cwd.clone(),
|
cwd: config.cwd.clone(),
|
||||||
original_config_do_not_use: Arc::clone(&config),
|
original_config_do_not_use: Arc::clone(&config),
|
||||||
features: Features::default(),
|
features: Features::default(),
|
||||||
|
session_source: SessionSource::Exec,
|
||||||
};
|
};
|
||||||
|
|
||||||
let state = SessionState::new(session_configuration.clone());
|
let state = SessionState::new(session_configuration.clone());
|
||||||
@@ -2738,6 +2580,7 @@ mod tests {
|
|||||||
cwd: config.cwd.clone(),
|
cwd: config.cwd.clone(),
|
||||||
original_config_do_not_use: Arc::clone(&config),
|
original_config_do_not_use: Arc::clone(&config),
|
||||||
features: Features::default(),
|
features: Features::default(),
|
||||||
|
session_source: SessionSource::Exec,
|
||||||
};
|
};
|
||||||
|
|
||||||
let state = SessionState::new(session_configuration.clone());
|
let state = SessionState::new(session_configuration.clone());
|
||||||
@@ -2802,12 +2645,6 @@ mod tests {
|
|||||||
sleep(Duration::from_secs(60)).await;
|
sleep(Duration::from_secs(60)).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
|
|
||||||
if let TaskKind::Review = self.kind {
|
|
||||||
exit_review_mode(session.clone_session(), ctx, None).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
@@ -2872,25 +2709,25 @@ mod tests {
|
|||||||
let input = vec![UserInput::Text {
|
let input = vec![UserInput::Text {
|
||||||
text: "start review".to_string(),
|
text: "start review".to_string(),
|
||||||
}];
|
}];
|
||||||
sess.spawn_task(
|
sess.spawn_task(Arc::clone(&tc), input, ReviewTask).await;
|
||||||
Arc::clone(&tc),
|
|
||||||
input,
|
|
||||||
NeverEndingTask {
|
|
||||||
kind: TaskKind::Review,
|
|
||||||
listen_to_cancellation_token: false,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||||
|
|
||||||
let first = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
|
// Drain events until we observe ExitedReviewMode; earlier
|
||||||
.await
|
// RawResponseItem entries (e.g., environment context) may arrive first.
|
||||||
.expect("timeout waiting for first event")
|
loop {
|
||||||
.expect("first event");
|
let evt = tokio::time::timeout(std::time::Duration::from_secs(1), rx.recv())
|
||||||
match first.msg {
|
.await
|
||||||
EventMsg::ExitedReviewMode(ev) => assert!(ev.review_output.is_none()),
|
.expect("timeout waiting for first event")
|
||||||
other => panic!("unexpected first event: {other:?}"),
|
.expect("first event");
|
||||||
|
match evt.msg {
|
||||||
|
EventMsg::ExitedReviewMode(ev) => {
|
||||||
|
assert!(ev.review_output.is_none());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Ignore any non-critical events before exit.
|
||||||
|
_ => continue,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
loop {
|
loop {
|
||||||
let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
|
let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ use crate::protocol::ErrorEvent;
|
|||||||
use crate::protocol::EventMsg;
|
use crate::protocol::EventMsg;
|
||||||
use crate::protocol::TaskStartedEvent;
|
use crate::protocol::TaskStartedEvent;
|
||||||
use crate::protocol::TurnContextItem;
|
use crate::protocol::TurnContextItem;
|
||||||
use crate::state::TaskKind;
|
|
||||||
use crate::truncate::truncate_middle;
|
use crate::truncate::truncate_middle;
|
||||||
use crate::util::backoff;
|
use crate::util::backoff;
|
||||||
use askama::Template;
|
use askama::Template;
|
||||||
@@ -255,11 +254,7 @@ async fn drain_to_completed(
|
|||||||
turn_context: &TurnContext,
|
turn_context: &TurnContext,
|
||||||
prompt: &Prompt,
|
prompt: &Prompt,
|
||||||
) -> CodexResult<()> {
|
) -> CodexResult<()> {
|
||||||
let mut stream = turn_context
|
let mut stream = turn_context.client.clone().stream(prompt).await?;
|
||||||
.client
|
|
||||||
.clone()
|
|
||||||
.stream_with_task_kind(prompt, TaskKind::Compact)
|
|
||||||
.await?;
|
|
||||||
loop {
|
loop {
|
||||||
let maybe_event = stream.next().await;
|
let maybe_event = stream.next().await;
|
||||||
let Some(event) = maybe_event else {
|
let Some(event) = maybe_event else {
|
||||||
|
|||||||
292
codex-rs/core/src/codex_delegate.rs
Normal file
292
codex-rs/core/src/codex_delegate.rs
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
use std::sync::atomic::AtomicU64;
|
||||||
|
|
||||||
|
use async_channel::Receiver;
|
||||||
|
use async_channel::Sender;
|
||||||
|
use codex_async_utils::OrCancelExt;
|
||||||
|
use codex_protocol::protocol::ApplyPatchApprovalRequestEvent;
|
||||||
|
use codex_protocol::protocol::Event;
|
||||||
|
use codex_protocol::protocol::EventMsg;
|
||||||
|
use codex_protocol::protocol::ExecApprovalRequestEvent;
|
||||||
|
use codex_protocol::protocol::Op;
|
||||||
|
use codex_protocol::protocol::SessionSource;
|
||||||
|
use codex_protocol::protocol::SubAgentSource;
|
||||||
|
use codex_protocol::protocol::Submission;
|
||||||
|
use codex_protocol::user_input::UserInput;
|
||||||
|
use tokio_util::sync::CancellationToken;
|
||||||
|
|
||||||
|
use crate::AuthManager;
|
||||||
|
use crate::codex::Codex;
|
||||||
|
use crate::codex::CodexSpawnOk;
|
||||||
|
use crate::codex::SUBMISSION_CHANNEL_CAPACITY;
|
||||||
|
use crate::codex::Session;
|
||||||
|
use crate::codex::TurnContext;
|
||||||
|
use crate::config::Config;
|
||||||
|
use crate::error::CodexErr;
|
||||||
|
use codex_protocol::protocol::InitialHistory;
|
||||||
|
|
||||||
|
/// Start an interactive sub-Codex conversation and return IO channels.
|
||||||
|
///
|
||||||
|
/// The returned `events_rx` yields non-approval events emitted by the sub-agent.
|
||||||
|
/// Approval requests are handled via `parent_session` and are not surfaced.
|
||||||
|
/// The returned `ops_tx` allows the caller to submit additional `Op`s to the sub-agent.
|
||||||
|
pub(crate) async fn run_codex_conversation_interactive(
|
||||||
|
config: Config,
|
||||||
|
auth_manager: Arc<AuthManager>,
|
||||||
|
parent_session: Arc<Session>,
|
||||||
|
parent_ctx: Arc<TurnContext>,
|
||||||
|
cancel_token: CancellationToken,
|
||||||
|
) -> Result<Codex, CodexErr> {
|
||||||
|
let (tx_sub, rx_sub) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
|
||||||
|
let (tx_ops, rx_ops) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
|
||||||
|
|
||||||
|
let CodexSpawnOk { codex, .. } = Codex::spawn(
|
||||||
|
config,
|
||||||
|
auth_manager,
|
||||||
|
InitialHistory::New,
|
||||||
|
SessionSource::SubAgent(SubAgentSource::Review),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
let codex = Arc::new(codex);
|
||||||
|
|
||||||
|
// Use a child token so parent cancel cascades but we can scope it to this task
|
||||||
|
let cancel_token_events = cancel_token.child_token();
|
||||||
|
let cancel_token_ops = cancel_token.child_token();
|
||||||
|
|
||||||
|
// Forward events from the sub-agent to the consumer, filtering approvals and
|
||||||
|
// routing them to the parent session for decisions.
|
||||||
|
let parent_session_clone = Arc::clone(&parent_session);
|
||||||
|
let parent_ctx_clone = Arc::clone(&parent_ctx);
|
||||||
|
let codex_for_events = Arc::clone(&codex);
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let _ = forward_events(
|
||||||
|
codex_for_events,
|
||||||
|
tx_sub,
|
||||||
|
parent_session_clone,
|
||||||
|
parent_ctx_clone,
|
||||||
|
cancel_token_events.clone(),
|
||||||
|
)
|
||||||
|
.or_cancel(&cancel_token_events)
|
||||||
|
.await;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Forward ops from the caller to the sub-agent.
|
||||||
|
let codex_for_ops = Arc::clone(&codex);
|
||||||
|
tokio::spawn(async move {
|
||||||
|
forward_ops(codex_for_ops, rx_ops, cancel_token_ops).await;
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(Codex {
|
||||||
|
next_id: AtomicU64::new(0),
|
||||||
|
tx_sub: tx_ops,
|
||||||
|
rx_event: rx_sub,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convenience wrapper for one-time use with an initial prompt.
|
||||||
|
///
|
||||||
|
/// Internally calls the interactive variant, then immediately submits the provided input.
|
||||||
|
pub(crate) async fn run_codex_conversation_one_shot(
|
||||||
|
config: Config,
|
||||||
|
auth_manager: Arc<AuthManager>,
|
||||||
|
input: Vec<UserInput>,
|
||||||
|
parent_session: Arc<Session>,
|
||||||
|
parent_ctx: Arc<TurnContext>,
|
||||||
|
cancel_token: CancellationToken,
|
||||||
|
) -> Result<Codex, CodexErr> {
|
||||||
|
// Use a child token so we can stop the delegate after completion without
|
||||||
|
// requiring the caller to cancel the parent token.
|
||||||
|
let child_cancel = cancel_token.child_token();
|
||||||
|
let io = run_codex_conversation_interactive(
|
||||||
|
config,
|
||||||
|
auth_manager,
|
||||||
|
parent_session,
|
||||||
|
parent_ctx,
|
||||||
|
child_cancel.clone(),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Send the initial input to kick off the one-shot turn.
|
||||||
|
io.submit(Op::UserInput { items: input }).await?;
|
||||||
|
|
||||||
|
// Bridge events so we can observe completion and shut down automatically.
|
||||||
|
let (tx_bridge, rx_bridge) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
|
||||||
|
let ops_tx = io.tx_sub.clone();
|
||||||
|
let io_for_bridge = io;
|
||||||
|
tokio::spawn(async move {
|
||||||
|
while let Ok(event) = io_for_bridge.next_event().await {
|
||||||
|
let should_shutdown = matches!(
|
||||||
|
event.msg,
|
||||||
|
EventMsg::TaskComplete(_) | EventMsg::TurnAborted(_)
|
||||||
|
);
|
||||||
|
let _ = tx_bridge.send(event).await;
|
||||||
|
if should_shutdown {
|
||||||
|
let _ = ops_tx
|
||||||
|
.send(Submission {
|
||||||
|
id: "shutdown".to_string(),
|
||||||
|
op: Op::Shutdown {},
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
child_cancel.cancel();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// For one-shot usage, return a closed `tx_sub` so callers cannot submit
|
||||||
|
// additional ops after the initial request. Create a channel and drop the
|
||||||
|
// receiver to close it immediately.
|
||||||
|
let (tx_closed, rx_closed) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
|
||||||
|
drop(rx_closed);
|
||||||
|
|
||||||
|
Ok(Codex {
|
||||||
|
next_id: AtomicU64::new(0),
|
||||||
|
rx_event: rx_bridge,
|
||||||
|
tx_sub: tx_closed,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn forward_events(
|
||||||
|
codex: Arc<Codex>,
|
||||||
|
tx_sub: Sender<Event>,
|
||||||
|
parent_session: Arc<Session>,
|
||||||
|
parent_ctx: Arc<TurnContext>,
|
||||||
|
cancel_token: CancellationToken,
|
||||||
|
) {
|
||||||
|
while let Ok(event) = codex.next_event().await {
|
||||||
|
match event {
|
||||||
|
Event {
|
||||||
|
id: _,
|
||||||
|
msg: EventMsg::SessionConfigured(_),
|
||||||
|
} => continue,
|
||||||
|
Event {
|
||||||
|
id,
|
||||||
|
msg: EventMsg::ExecApprovalRequest(event),
|
||||||
|
} => {
|
||||||
|
// Initiate approval via parent session; do not surface to consumer.
|
||||||
|
handle_exec_approval(
|
||||||
|
&codex,
|
||||||
|
id,
|
||||||
|
&parent_session,
|
||||||
|
&parent_ctx,
|
||||||
|
event,
|
||||||
|
&cancel_token,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
Event {
|
||||||
|
id,
|
||||||
|
msg: EventMsg::ApplyPatchApprovalRequest(event),
|
||||||
|
} => {
|
||||||
|
handle_patch_approval(
|
||||||
|
&codex,
|
||||||
|
id,
|
||||||
|
&parent_session,
|
||||||
|
&parent_ctx,
|
||||||
|
event,
|
||||||
|
&cancel_token,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
other => {
|
||||||
|
let _ = tx_sub.send(other).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Forward ops from a caller to a sub-agent, respecting cancellation.
|
||||||
|
async fn forward_ops(
|
||||||
|
codex: Arc<Codex>,
|
||||||
|
rx_ops: Receiver<Submission>,
|
||||||
|
cancel_token_ops: CancellationToken,
|
||||||
|
) {
|
||||||
|
loop {
|
||||||
|
let op: Op = match rx_ops.recv().or_cancel(&cancel_token_ops).await {
|
||||||
|
Ok(Ok(Submission { id: _, op })) => op,
|
||||||
|
Ok(Err(_)) | Err(_) => break,
|
||||||
|
};
|
||||||
|
let _ = codex.submit(op).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handle an ExecApprovalRequest by consulting the parent session and replying.
|
||||||
|
async fn handle_exec_approval(
|
||||||
|
codex: &Codex,
|
||||||
|
id: String,
|
||||||
|
parent_session: &Session,
|
||||||
|
parent_ctx: &TurnContext,
|
||||||
|
event: ExecApprovalRequestEvent,
|
||||||
|
cancel_token: &CancellationToken,
|
||||||
|
) {
|
||||||
|
// Race approval with cancellation and timeout to avoid hangs.
|
||||||
|
let approval_fut = parent_session.request_command_approval(
|
||||||
|
parent_ctx,
|
||||||
|
parent_ctx.sub_id.clone(),
|
||||||
|
event.command,
|
||||||
|
event.cwd,
|
||||||
|
event.reason,
|
||||||
|
event.risk,
|
||||||
|
);
|
||||||
|
let decision = await_approval_with_cancel(
|
||||||
|
approval_fut,
|
||||||
|
parent_session,
|
||||||
|
&parent_ctx.sub_id,
|
||||||
|
cancel_token,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let _ = codex.submit(Op::ExecApproval { id, decision }).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handle an ApplyPatchApprovalRequest by consulting the parent session and replying.
|
||||||
|
async fn handle_patch_approval(
|
||||||
|
codex: &Codex,
|
||||||
|
id: String,
|
||||||
|
parent_session: &Session,
|
||||||
|
parent_ctx: &TurnContext,
|
||||||
|
event: ApplyPatchApprovalRequestEvent,
|
||||||
|
cancel_token: &CancellationToken,
|
||||||
|
) {
|
||||||
|
let decision_rx = parent_session
|
||||||
|
.request_patch_approval(
|
||||||
|
parent_ctx,
|
||||||
|
parent_ctx.sub_id.clone(),
|
||||||
|
event.changes,
|
||||||
|
event.reason,
|
||||||
|
event.grant_root,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let decision = await_approval_with_cancel(
|
||||||
|
async move { decision_rx.await.unwrap_or_default() },
|
||||||
|
parent_session,
|
||||||
|
&parent_ctx.sub_id,
|
||||||
|
cancel_token,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let _ = codex.submit(Op::PatchApproval { id, decision }).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Await an approval decision, aborting on cancellation.
|
||||||
|
async fn await_approval_with_cancel<F>(
|
||||||
|
fut: F,
|
||||||
|
parent_session: &Session,
|
||||||
|
sub_id: &str,
|
||||||
|
cancel_token: &CancellationToken,
|
||||||
|
) -> codex_protocol::protocol::ReviewDecision
|
||||||
|
where
|
||||||
|
F: core::future::Future<Output = codex_protocol::protocol::ReviewDecision>,
|
||||||
|
{
|
||||||
|
tokio::select! {
|
||||||
|
biased;
|
||||||
|
_ = cancel_token.cancelled() => {
|
||||||
|
parent_session
|
||||||
|
.notify_approval(sub_id, codex_protocol::protocol::ReviewDecision::Abort)
|
||||||
|
.await;
|
||||||
|
codex_protocol::protocol::ReviewDecision::Abort
|
||||||
|
}
|
||||||
|
decision = fut => {
|
||||||
|
decision
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -73,7 +73,7 @@ impl ConversationManager {
|
|||||||
config,
|
config,
|
||||||
auth_manager,
|
auth_manager,
|
||||||
InitialHistory::New,
|
InitialHistory::New,
|
||||||
self.session_source,
|
self.session_source.clone(),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
self.finalize_spawn(codex, conversation_id).await
|
self.finalize_spawn(codex, conversation_id).await
|
||||||
@@ -145,7 +145,13 @@ impl ConversationManager {
|
|||||||
let CodexSpawnOk {
|
let CodexSpawnOk {
|
||||||
codex,
|
codex,
|
||||||
conversation_id,
|
conversation_id,
|
||||||
} = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?;
|
} = Codex::spawn(
|
||||||
|
config,
|
||||||
|
auth_manager,
|
||||||
|
initial_history,
|
||||||
|
self.session_source.clone(),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
self.finalize_spawn(codex, conversation_id).await
|
self.finalize_spawn(codex, conversation_id).await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -179,7 +185,7 @@ impl ConversationManager {
|
|||||||
let CodexSpawnOk {
|
let CodexSpawnOk {
|
||||||
codex,
|
codex,
|
||||||
conversation_id,
|
conversation_id,
|
||||||
} = Codex::spawn(config, auth_manager, history, self.session_source).await?;
|
} = Codex::spawn(config, auth_manager, history, self.session_source.clone()).await?;
|
||||||
|
|
||||||
self.finalize_spawn(codex, conversation_id).await
|
self.finalize_spawn(codex, conversation_id).await
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -122,8 +122,9 @@ impl Features {
|
|||||||
self.enabled.insert(f);
|
self.enabled.insert(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn disable(&mut self, f: Feature) {
|
pub fn disable(&mut self, f: Feature) -> &mut Self {
|
||||||
self.enabled.remove(&f);
|
self.enabled.remove(&f);
|
||||||
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn record_legacy_usage_force(&mut self, alias: &str, feature: Feature) {
|
pub fn record_legacy_usage_force(&mut self, alias: &str, feature: Feature) {
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ mod client_common;
|
|||||||
pub mod codex;
|
pub mod codex;
|
||||||
mod codex_conversation;
|
mod codex_conversation;
|
||||||
pub use codex_conversation::CodexConversation;
|
pub use codex_conversation::CodexConversation;
|
||||||
|
mod codex_delegate;
|
||||||
mod command_safety;
|
mod command_safety;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod config_edit;
|
pub mod config_edit;
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
use crate::codex::Session;
|
use crate::codex::Session;
|
||||||
use crate::codex::TurnContext;
|
use crate::codex::TurnContext;
|
||||||
use crate::conversation_history::ConversationHistory;
|
|
||||||
use codex_protocol::models::FunctionCallOutputPayload;
|
use codex_protocol::models::FunctionCallOutputPayload;
|
||||||
use codex_protocol::models::ResponseInputItem;
|
use codex_protocol::models::ResponseInputItem;
|
||||||
use codex_protocol::models::ResponseItem;
|
use codex_protocol::models::ResponseItem;
|
||||||
@@ -11,8 +10,6 @@ use tracing::warn;
|
|||||||
/// - `ResponseInputItem`s to send back to the model on the next turn.
|
/// - `ResponseInputItem`s to send back to the model on the next turn.
|
||||||
pub(crate) async fn process_items(
|
pub(crate) async fn process_items(
|
||||||
processed_items: Vec<crate::codex::ProcessedResponseItem>,
|
processed_items: Vec<crate::codex::ProcessedResponseItem>,
|
||||||
is_review_mode: bool,
|
|
||||||
review_thread_history: &mut ConversationHistory,
|
|
||||||
sess: &Session,
|
sess: &Session,
|
||||||
turn_context: &TurnContext,
|
turn_context: &TurnContext,
|
||||||
) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
|
) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
|
||||||
@@ -100,12 +97,8 @@ pub(crate) async fn process_items(
|
|||||||
|
|
||||||
// Only attempt to take the lock if there is something to record.
|
// Only attempt to take the lock if there is something to record.
|
||||||
if !items_to_record_in_conversation_history.is_empty() {
|
if !items_to_record_in_conversation_history.is_empty() {
|
||||||
if is_review_mode {
|
sess.record_conversation_items(turn_context, &items_to_record_in_conversation_history)
|
||||||
review_thread_history.record_items(items_to_record_in_conversation_history.iter());
|
.await;
|
||||||
} else {
|
|
||||||
sess.record_conversation_items(turn_context, &items_to_record_in_conversation_history)
|
|
||||||
.await;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
(responses, items_to_record_in_conversation_history)
|
(responses, items_to_record_in_conversation_history)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -409,7 +409,7 @@ async fn read_head_and_tail(
|
|||||||
|
|
||||||
match rollout_line.item {
|
match rollout_line.item {
|
||||||
RolloutItem::SessionMeta(session_meta_line) => {
|
RolloutItem::SessionMeta(session_meta_line) => {
|
||||||
summary.source = Some(session_meta_line.meta.source);
|
summary.source = Some(session_meta_line.meta.source.clone());
|
||||||
summary.model_provider = session_meta_line.meta.model_provider.clone();
|
summary.model_provider = session_meta_line.meta.model_provider.clone();
|
||||||
summary.created_at = summary
|
summary.created_at = summary
|
||||||
.created_at
|
.created_at
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ use codex_protocol::ConversationId;
|
|||||||
use codex_protocol::models::ContentItem;
|
use codex_protocol::models::ContentItem;
|
||||||
use codex_protocol::models::ResponseItem;
|
use codex_protocol::models::ResponseItem;
|
||||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||||
|
use codex_protocol::protocol::SessionSource;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use tokio::time::timeout;
|
use tokio::time::timeout;
|
||||||
@@ -53,6 +54,7 @@ pub(crate) async fn assess_command(
|
|||||||
auth_manager: Arc<AuthManager>,
|
auth_manager: Arc<AuthManager>,
|
||||||
parent_otel: &OtelEventManager,
|
parent_otel: &OtelEventManager,
|
||||||
conversation_id: ConversationId,
|
conversation_id: ConversationId,
|
||||||
|
session_source: SessionSource,
|
||||||
call_id: &str,
|
call_id: &str,
|
||||||
command: &[String],
|
command: &[String],
|
||||||
sandbox_policy: &SandboxPolicy,
|
sandbox_policy: &SandboxPolicy,
|
||||||
@@ -141,6 +143,7 @@ pub(crate) async fn assess_command(
|
|||||||
config.model_reasoning_effort,
|
config.model_reasoning_effort,
|
||||||
config.model_reasoning_summary,
|
config.model_reasoning_summary,
|
||||||
conversation_id,
|
conversation_id,
|
||||||
|
session_source,
|
||||||
);
|
);
|
||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|||||||
@@ -37,16 +37,6 @@ pub(crate) enum TaskKind {
|
|||||||
Compact,
|
Compact,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TaskKind {
|
|
||||||
pub(crate) fn header_value(self) -> &'static str {
|
|
||||||
match self {
|
|
||||||
TaskKind::Regular => "standard",
|
|
||||||
TaskKind::Review => "review",
|
|
||||||
TaskKind::Compact => "compact",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub(crate) struct RunningTask {
|
pub(crate) struct RunningTask {
|
||||||
pub(crate) done: Arc<Notify>,
|
pub(crate) done: Arc<Notify>,
|
||||||
@@ -123,15 +113,3 @@ impl ActiveTurn {
|
|||||||
ts.clear_pending();
|
ts.clear_pending();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::TaskKind;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn header_value_matches_expected_labels() {
|
|
||||||
assert_eq!(TaskKind::Regular.header_value(), "standard");
|
|
||||||
assert_eq!(TaskKind::Review.header_value(), "review");
|
|
||||||
assert_eq!(TaskKind::Compact.header_value(), "compact");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ use tokio_util::task::AbortOnDropHandle;
|
|||||||
use tracing::trace;
|
use tracing::trace;
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
||||||
|
use crate::AuthManager;
|
||||||
use crate::codex::Session;
|
use crate::codex::Session;
|
||||||
use crate::codex::TurnContext;
|
use crate::codex::TurnContext;
|
||||||
use crate::protocol::EventMsg;
|
use crate::protocol::EventMsg;
|
||||||
@@ -50,6 +51,10 @@ impl SessionTaskContext {
|
|||||||
pub(crate) fn clone_session(&self) -> Arc<Session> {
|
pub(crate) fn clone_session(&self) -> Arc<Session> {
|
||||||
Arc::clone(&self.session)
|
Arc::clone(&self.session)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn auth_manager(&self) -> Arc<AuthManager> {
|
||||||
|
Arc::clone(&self.session.services.auth_manager)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Async task that drives a [`Session`] turn.
|
/// Async task that drives a [`Session`] turn.
|
||||||
@@ -123,7 +128,7 @@ impl Session {
|
|||||||
task_cancellation_token.child_token(),
|
task_cancellation_token.child_token(),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
session_ctx.clone_session().flush_rollout().await;
|
||||||
if !task_cancellation_token.is_cancelled() {
|
if !task_cancellation_token.is_cancelled() {
|
||||||
// Emit completion uniformly from spawn site so all tasks share the same lifecycle.
|
// Emit completion uniformly from spawn site so all tasks share the same lifecycle.
|
||||||
let sess = session_ctx.clone_session();
|
let sess = session_ctx.clone_session();
|
||||||
|
|||||||
@@ -28,6 +28,6 @@ impl SessionTask for RegularTask {
|
|||||||
cancellation_token: CancellationToken,
|
cancellation_token: CancellationToken,
|
||||||
) -> Option<String> {
|
) -> Option<String> {
|
||||||
let sess = session.clone_session();
|
let sess = session.clone_session();
|
||||||
run_task(sess, ctx, input, TaskKind::Regular, cancellation_token).await
|
run_task(sess, ctx, input, cancellation_token).await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,18 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
use codex_protocol::models::ContentItem;
|
||||||
|
use codex_protocol::models::ResponseItem;
|
||||||
|
use codex_protocol::protocol::Event;
|
||||||
|
use codex_protocol::protocol::EventMsg;
|
||||||
|
use codex_protocol::protocol::ExitedReviewModeEvent;
|
||||||
|
use codex_protocol::protocol::ReviewOutputEvent;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
|
|
||||||
|
use crate::codex::Session;
|
||||||
use crate::codex::TurnContext;
|
use crate::codex::TurnContext;
|
||||||
use crate::codex::exit_review_mode;
|
use crate::codex_delegate::run_codex_conversation_one_shot;
|
||||||
use crate::codex::run_task;
|
use crate::review_format::format_review_findings_block;
|
||||||
use crate::state::TaskKind;
|
use crate::state::TaskKind;
|
||||||
use codex_protocol::user_input::UserInput;
|
use codex_protocol::user_input::UserInput;
|
||||||
|
|
||||||
@@ -28,11 +35,162 @@ impl SessionTask for ReviewTask {
|
|||||||
input: Vec<UserInput>,
|
input: Vec<UserInput>,
|
||||||
cancellation_token: CancellationToken,
|
cancellation_token: CancellationToken,
|
||||||
) -> Option<String> {
|
) -> Option<String> {
|
||||||
let sess = session.clone_session();
|
// Start sub-codex conversation and get the receiver for events.
|
||||||
run_task(sess, ctx, input, TaskKind::Review, cancellation_token).await
|
let output = match start_review_conversation(
|
||||||
|
session.clone(),
|
||||||
|
ctx.clone(),
|
||||||
|
input,
|
||||||
|
cancellation_token.clone(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Some(receiver) => process_review_events(session.clone(), ctx.clone(), receiver).await,
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
if !cancellation_token.is_cancelled() {
|
||||||
|
exit_review_mode(session.clone_session(), output.clone(), ctx.clone()).await;
|
||||||
|
}
|
||||||
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
|
async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
|
||||||
exit_review_mode(session.clone_session(), ctx, None).await;
|
exit_review_mode(session.clone_session(), None, ctx).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn start_review_conversation(
|
||||||
|
session: Arc<SessionTaskContext>,
|
||||||
|
ctx: Arc<TurnContext>,
|
||||||
|
input: Vec<UserInput>,
|
||||||
|
cancellation_token: CancellationToken,
|
||||||
|
) -> Option<async_channel::Receiver<Event>> {
|
||||||
|
let config = ctx.client.config();
|
||||||
|
let mut sub_agent_config = config.as_ref().clone();
|
||||||
|
// Run with only reviewer rubric — drop outer user_instructions
|
||||||
|
sub_agent_config.user_instructions = None;
|
||||||
|
// Avoid loading project docs; reviewer only needs findings
|
||||||
|
sub_agent_config.project_doc_max_bytes = 0;
|
||||||
|
// Carry over review-only feature restrictions so the delegate cannot
|
||||||
|
// re-enable blocked tools (web search, view image, streamable shell).
|
||||||
|
sub_agent_config
|
||||||
|
.features
|
||||||
|
.disable(crate::features::Feature::WebSearchRequest)
|
||||||
|
.disable(crate::features::Feature::ViewImageTool)
|
||||||
|
.disable(crate::features::Feature::StreamableShell);
|
||||||
|
// Set explicit review rubric for the sub-agent
|
||||||
|
sub_agent_config.base_instructions = Some(crate::REVIEW_PROMPT.to_string());
|
||||||
|
(run_codex_conversation_one_shot(
|
||||||
|
sub_agent_config,
|
||||||
|
session.auth_manager(),
|
||||||
|
input,
|
||||||
|
session.clone_session(),
|
||||||
|
ctx.clone(),
|
||||||
|
cancellation_token,
|
||||||
|
)
|
||||||
|
.await)
|
||||||
|
.ok()
|
||||||
|
.map(|io| io.rx_event)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn process_review_events(
|
||||||
|
session: Arc<SessionTaskContext>,
|
||||||
|
ctx: Arc<TurnContext>,
|
||||||
|
receiver: async_channel::Receiver<Event>,
|
||||||
|
) -> Option<ReviewOutputEvent> {
|
||||||
|
let mut prev_agent_message: Option<Event> = None;
|
||||||
|
while let Ok(event) = receiver.recv().await {
|
||||||
|
match event.clone().msg {
|
||||||
|
EventMsg::AgentMessage(_) => {
|
||||||
|
if let Some(prev) = prev_agent_message.take() {
|
||||||
|
session
|
||||||
|
.clone_session()
|
||||||
|
.send_event(ctx.as_ref(), prev.msg)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
prev_agent_message = Some(event);
|
||||||
|
}
|
||||||
|
EventMsg::TaskComplete(task_complete) => {
|
||||||
|
// Parse review output from the last agent message (if present).
|
||||||
|
let out = task_complete
|
||||||
|
.last_agent_message
|
||||||
|
.as_deref()
|
||||||
|
.map(parse_review_output_event);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
EventMsg::TurnAborted(_) => {
|
||||||
|
// Cancellation or abort: consumer will finalize with None.
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
other => {
|
||||||
|
session
|
||||||
|
.clone_session()
|
||||||
|
.send_event(ctx.as_ref(), other)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Channel closed without TaskComplete: treat as interrupted.
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a ReviewOutputEvent from a text blob returned by the reviewer model.
|
||||||
|
/// If the text is valid JSON matching ReviewOutputEvent, deserialize it.
|
||||||
|
/// Otherwise, attempt to extract the first JSON object substring and parse it.
|
||||||
|
/// If parsing still fails, return a structured fallback carrying the plain text
|
||||||
|
/// in `overall_explanation`.
|
||||||
|
fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
|
||||||
|
if let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(text) {
|
||||||
|
return ev;
|
||||||
|
}
|
||||||
|
if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}'))
|
||||||
|
&& start < end
|
||||||
|
&& let Some(slice) = text.get(start..=end)
|
||||||
|
&& let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(slice)
|
||||||
|
{
|
||||||
|
return ev;
|
||||||
|
}
|
||||||
|
ReviewOutputEvent {
|
||||||
|
overall_explanation: text.to_string(),
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Emits an ExitedReviewMode Event with optional ReviewOutput,
|
||||||
|
/// and records a developer message with the review output.
|
||||||
|
pub(crate) async fn exit_review_mode(
|
||||||
|
session: Arc<Session>,
|
||||||
|
review_output: Option<ReviewOutputEvent>,
|
||||||
|
ctx: Arc<TurnContext>,
|
||||||
|
) {
|
||||||
|
let user_message = if let Some(out) = review_output.clone() {
|
||||||
|
let mut findings_str = String::new();
|
||||||
|
let text = out.overall_explanation.trim();
|
||||||
|
if !text.is_empty() {
|
||||||
|
findings_str.push_str(text);
|
||||||
|
}
|
||||||
|
if !out.findings.is_empty() {
|
||||||
|
let block = format_review_findings_block(&out.findings, None);
|
||||||
|
findings_str.push_str(&format!("\n{block}"));
|
||||||
|
}
|
||||||
|
crate::client_common::REVIEW_EXIT_SUCCESS_TMPL.replace("{results}", &findings_str)
|
||||||
|
} else {
|
||||||
|
crate::client_common::REVIEW_EXIT_INTERRUPTED_TMPL.to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
session
|
||||||
|
.record_conversation_items(
|
||||||
|
&ctx,
|
||||||
|
&[ResponseItem::Message {
|
||||||
|
id: None,
|
||||||
|
role: "user".to_string(),
|
||||||
|
content: vec![ContentItem::InputText { text: user_message }],
|
||||||
|
}],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
session
|
||||||
|
.send_event(
|
||||||
|
ctx.as_ref(),
|
||||||
|
EventMsg::ExitedReviewMode(ExitedReviewModeEvent { review_output }),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|||||||
8
codex-rs/core/templates/review/exit_interrupted.xml
Normal file
8
codex-rs/core/templates/review/exit_interrupted.xml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
<user_action>
|
||||||
|
<context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
|
||||||
|
<action>review</action>
|
||||||
|
<results>
|
||||||
|
None.
|
||||||
|
</results>
|
||||||
|
</user_action>
|
||||||
|
|
||||||
8
codex-rs/core/templates/review/exit_success.xml
Normal file
8
codex-rs/core/templates/review/exit_success.xml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
<user_action>
|
||||||
|
<context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
|
||||||
|
<action>review</action>
|
||||||
|
<results>
|
||||||
|
{results}
|
||||||
|
</results>
|
||||||
|
</user_action>
|
||||||
|
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
<user_action>
|
||||||
|
<context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
|
||||||
|
<action>review</action>
|
||||||
|
<results>
|
||||||
|
{findings}
|
||||||
|
</results>
|
||||||
|
</user_action>
|
||||||
|
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
<user_action>
|
||||||
|
<context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
|
||||||
|
<action>review</action>
|
||||||
|
<results>
|
||||||
|
None.
|
||||||
|
</results>
|
||||||
|
</user_action>
|
||||||
|
|
||||||
@@ -94,6 +94,7 @@ async fn run_request(input: Vec<ResponseItem>) -> Value {
|
|||||||
effort,
|
effort,
|
||||||
summary,
|
summary,
|
||||||
conversation_id,
|
conversation_id,
|
||||||
|
codex_protocol::protocol::SessionSource::Exec,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut prompt = Prompt::default();
|
let mut prompt = Prompt::default();
|
||||||
|
|||||||
@@ -94,6 +94,7 @@ async fn run_stream_with_bytes(sse_body: &[u8]) -> Vec<ResponseEvent> {
|
|||||||
effort,
|
effort,
|
||||||
summary,
|
summary,
|
||||||
conversation_id,
|
conversation_id,
|
||||||
|
codex_protocol::protocol::SessionSource::Exec,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut prompt = Prompt::default();
|
let mut prompt = Prompt::default();
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ use codex_core::ResponseItem;
|
|||||||
use codex_core::WireApi;
|
use codex_core::WireApi;
|
||||||
use codex_otel::otel_event_manager::OtelEventManager;
|
use codex_otel::otel_event_manager::OtelEventManager;
|
||||||
use codex_protocol::ConversationId;
|
use codex_protocol::ConversationId;
|
||||||
|
use codex_protocol::protocol::SessionSource;
|
||||||
use core_test_support::load_default_config_for_test;
|
use core_test_support::load_default_config_for_test;
|
||||||
use core_test_support::responses;
|
use core_test_support::responses;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
@@ -26,12 +27,9 @@ async fn responses_stream_includes_task_type_header() {
|
|||||||
responses::ev_completed("resp-1"),
|
responses::ev_completed("resp-1"),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let request_recorder = responses::mount_sse_once_match(
|
let request_recorder =
|
||||||
&server,
|
responses::mount_sse_once_match(&server, header("Codex-Task-Type", "exec"), response_body)
|
||||||
header("Codex-Task-Type", "standard"),
|
.await;
|
||||||
response_body,
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
let provider = ModelProviderInfo {
|
let provider = ModelProviderInfo {
|
||||||
name: "mock".into(),
|
name: "mock".into(),
|
||||||
@@ -78,6 +76,7 @@ async fn responses_stream_includes_task_type_header() {
|
|||||||
effort,
|
effort,
|
||||||
summary,
|
summary,
|
||||||
conversation_id,
|
conversation_id,
|
||||||
|
SessionSource::Exec,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut prompt = Prompt::default();
|
let mut prompt = Prompt::default();
|
||||||
@@ -97,8 +96,5 @@ async fn responses_stream_includes_task_type_header() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let request = request_recorder.single_request();
|
let request = request_recorder.single_request();
|
||||||
assert_eq!(
|
assert_eq!(request.header("Codex-Task-Type").as_deref(), Some("exec"));
|
||||||
request.header("Codex-Task-Type").as_deref(),
|
|
||||||
Some("standard")
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -675,6 +675,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
|
|||||||
effort,
|
effort,
|
||||||
summary,
|
summary,
|
||||||
conversation_id,
|
conversation_id,
|
||||||
|
codex_protocol::protocol::SessionSource::Exec,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut prompt = Prompt::default();
|
let mut prompt = Prompt::default();
|
||||||
|
|||||||
173
codex-rs/core/tests/suite/codex_delegate.rs
Normal file
173
codex-rs/core/tests/suite/codex_delegate.rs
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
use codex_core::protocol::AskForApproval;
|
||||||
|
use codex_core::protocol::EventMsg;
|
||||||
|
use codex_core::protocol::Op;
|
||||||
|
use codex_core::protocol::ReviewDecision;
|
||||||
|
use codex_core::protocol::ReviewRequest;
|
||||||
|
use codex_core::protocol::SandboxPolicy;
|
||||||
|
use core_test_support::responses::ev_apply_patch_function_call;
|
||||||
|
use core_test_support::responses::ev_assistant_message;
|
||||||
|
use core_test_support::responses::ev_completed;
|
||||||
|
use core_test_support::responses::ev_function_call;
|
||||||
|
use core_test_support::responses::ev_response_created;
|
||||||
|
use core_test_support::responses::mount_sse_sequence;
|
||||||
|
use core_test_support::responses::sse;
|
||||||
|
use core_test_support::responses::start_mock_server;
|
||||||
|
use core_test_support::skip_if_no_network;
|
||||||
|
use core_test_support::test_codex::test_codex;
|
||||||
|
use core_test_support::wait_for_event;
|
||||||
|
|
||||||
|
/// Delegate should surface ExecApprovalRequest from sub-agent and proceed
|
||||||
|
/// after parent submits an approval decision.
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn codex_delegate_forwards_exec_approval_and_proceeds_on_approval() {
|
||||||
|
skip_if_no_network!();
|
||||||
|
|
||||||
|
// Sub-agent turn 1: emit a shell function_call requiring approval, then complete.
|
||||||
|
let call_id = "call-exec-1";
|
||||||
|
let args = serde_json::json!({
|
||||||
|
"command": ["bash", "-lc", "rm -rf delegated"],
|
||||||
|
"timeout_ms": 1000,
|
||||||
|
"with_escalated_permissions": true,
|
||||||
|
})
|
||||||
|
.to_string();
|
||||||
|
let sse1 = sse(vec![
|
||||||
|
ev_response_created("resp-1"),
|
||||||
|
ev_function_call(call_id, "shell", &args),
|
||||||
|
ev_completed("resp-1"),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Sub-agent turn 2: return structured review output and complete.
|
||||||
|
let review_json = serde_json::json!({
|
||||||
|
"findings": [],
|
||||||
|
"overall_correctness": "ok",
|
||||||
|
"overall_explanation": "delegate approved exec",
|
||||||
|
"overall_confidence_score": 0.5
|
||||||
|
})
|
||||||
|
.to_string();
|
||||||
|
let sse2 = sse(vec![
|
||||||
|
ev_response_created("resp-2"),
|
||||||
|
ev_assistant_message("msg-1", &review_json),
|
||||||
|
ev_completed("resp-2"),
|
||||||
|
]);
|
||||||
|
|
||||||
|
let server = start_mock_server().await;
|
||||||
|
mount_sse_sequence(&server, vec![sse1, sse2]).await;
|
||||||
|
|
||||||
|
// Build a conversation configured to require approvals so the delegate
|
||||||
|
// routes ExecApprovalRequest via the parent.
|
||||||
|
let mut builder = test_codex().with_config(|config| {
|
||||||
|
config.approval_policy = AskForApproval::OnRequest;
|
||||||
|
config.sandbox_policy = SandboxPolicy::ReadOnly;
|
||||||
|
});
|
||||||
|
let test = builder.build(&server).await.expect("build test codex");
|
||||||
|
|
||||||
|
// Kick off review (sub-agent starts internally).
|
||||||
|
test.codex
|
||||||
|
.submit(Op::Review {
|
||||||
|
review_request: ReviewRequest {
|
||||||
|
prompt: "Please review".to_string(),
|
||||||
|
user_facing_hint: "review".to_string(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.expect("submit review");
|
||||||
|
|
||||||
|
// Lifecycle: Entered -> ExecApprovalRequest -> Exited(Some) -> TaskComplete.
|
||||||
|
wait_for_event(&test.codex, |ev| {
|
||||||
|
matches!(ev, EventMsg::EnteredReviewMode(_))
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Expect parent-side approval request (forwarded by delegate).
|
||||||
|
wait_for_event(&test.codex, |ev| {
|
||||||
|
matches!(ev, EventMsg::ExecApprovalRequest(_))
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Approve via parent; id "0" is the active sub_id in tests.
|
||||||
|
test.codex
|
||||||
|
.submit(Op::ExecApproval {
|
||||||
|
id: "0".into(),
|
||||||
|
decision: ReviewDecision::Approved,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.expect("submit exec approval");
|
||||||
|
|
||||||
|
wait_for_event(&test.codex, |ev| {
|
||||||
|
matches!(ev, EventMsg::ExitedReviewMode(_))
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delegate should surface ApplyPatchApprovalRequest and honor parent decision
|
||||||
|
/// so the sub-agent can proceed to completion.
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn codex_delegate_forwards_patch_approval_and_proceeds_on_decision() {
|
||||||
|
skip_if_no_network!();
|
||||||
|
|
||||||
|
let call_id = "call-patch-1";
|
||||||
|
let patch = "*** Begin Patch\n*** Add File: delegated.txt\n+hello\n*** End Patch\n";
|
||||||
|
let sse1 = sse(vec![
|
||||||
|
ev_response_created("resp-1"),
|
||||||
|
ev_apply_patch_function_call(call_id, patch),
|
||||||
|
ev_completed("resp-1"),
|
||||||
|
]);
|
||||||
|
let review_json = serde_json::json!({
|
||||||
|
"findings": [],
|
||||||
|
"overall_correctness": "ok",
|
||||||
|
"overall_explanation": "delegate patch handled",
|
||||||
|
"overall_confidence_score": 0.5
|
||||||
|
})
|
||||||
|
.to_string();
|
||||||
|
let sse2 = sse(vec![
|
||||||
|
ev_response_created("resp-2"),
|
||||||
|
ev_assistant_message("msg-1", &review_json),
|
||||||
|
ev_completed("resp-2"),
|
||||||
|
]);
|
||||||
|
|
||||||
|
let server = start_mock_server().await;
|
||||||
|
mount_sse_sequence(&server, vec![sse1, sse2]).await;
|
||||||
|
|
||||||
|
let mut builder = test_codex().with_config(|config| {
|
||||||
|
config.approval_policy = AskForApproval::OnRequest;
|
||||||
|
// Use a restricted sandbox so patch approval is required
|
||||||
|
config.sandbox_policy = SandboxPolicy::ReadOnly;
|
||||||
|
config.include_apply_patch_tool = true;
|
||||||
|
});
|
||||||
|
let test = builder.build(&server).await.expect("build test codex");
|
||||||
|
|
||||||
|
test.codex
|
||||||
|
.submit(Op::Review {
|
||||||
|
review_request: ReviewRequest {
|
||||||
|
prompt: "Please review".to_string(),
|
||||||
|
user_facing_hint: "review".to_string(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.expect("submit review");
|
||||||
|
|
||||||
|
wait_for_event(&test.codex, |ev| {
|
||||||
|
matches!(ev, EventMsg::EnteredReviewMode(_))
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
wait_for_event(&test.codex, |ev| {
|
||||||
|
matches!(ev, EventMsg::ApplyPatchApprovalRequest(_))
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Deny via parent so delegate can continue; id "0" is the active sub_id in tests.
|
||||||
|
test.codex
|
||||||
|
.submit(Op::PatchApproval {
|
||||||
|
id: "0".into(),
|
||||||
|
decision: ReviewDecision::Denied,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.expect("submit patch approval");
|
||||||
|
|
||||||
|
wait_for_event(&test.codex, |ev| {
|
||||||
|
matches!(ev, EventMsg::ExitedReviewMode(_))
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||||
|
}
|
||||||
@@ -8,6 +8,7 @@ mod apply_patch_cli;
|
|||||||
mod approvals;
|
mod approvals;
|
||||||
mod cli_stream;
|
mod cli_stream;
|
||||||
mod client;
|
mod client;
|
||||||
|
mod codex_delegate;
|
||||||
mod compact;
|
mod compact;
|
||||||
mod compact_resume_fork;
|
mod compact_resume_fork;
|
||||||
mod deprecation_notice;
|
mod deprecation_notice;
|
||||||
|
|||||||
@@ -950,7 +950,7 @@ impl InitialHistory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema, TS, Default)]
|
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema, TS, Default)]
|
||||||
#[serde(rename_all = "lowercase")]
|
#[serde(rename_all = "lowercase")]
|
||||||
#[ts(rename_all = "lowercase")]
|
#[ts(rename_all = "lowercase")]
|
||||||
pub enum SessionSource {
|
pub enum SessionSource {
|
||||||
@@ -959,10 +959,18 @@ pub enum SessionSource {
|
|||||||
VSCode,
|
VSCode,
|
||||||
Exec,
|
Exec,
|
||||||
Mcp,
|
Mcp,
|
||||||
|
SubAgent(SubAgentSource),
|
||||||
#[serde(other)]
|
#[serde(other)]
|
||||||
Unknown,
|
Unknown,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema, TS)]
|
||||||
|
pub enum SubAgentSource {
|
||||||
|
Review,
|
||||||
|
Compact,
|
||||||
|
Other(String),
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)]
|
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)]
|
||||||
#[ts(optional_fields = nullable)]
|
#[ts(optional_fields = nullable)]
|
||||||
pub struct SessionMeta {
|
pub struct SessionMeta {
|
||||||
|
|||||||
Reference in New Issue
Block a user