Review mode core updates (#3701)
1. Adds the environment prompt (including cwd) to review thread 2. Prepends the review prompt as a user message (temporary fix so the instructions are not replaced on backend) 3. Sets reasoning to low 4. Sets default review model to `gpt-5-codex`
This commit is contained in:
@@ -1154,20 +1154,16 @@ impl AgentTask {
|
|||||||
fn abort(self, reason: TurnAbortReason) {
|
fn abort(self, reason: TurnAbortReason) {
|
||||||
// TOCTOU?
|
// TOCTOU?
|
||||||
if !self.handle.is_finished() {
|
if !self.handle.is_finished() {
|
||||||
if self.kind == AgentTaskKind::Review {
|
|
||||||
let sess = self.sess.clone();
|
|
||||||
let sub_id = self.sub_id.clone();
|
|
||||||
tokio::spawn(async move {
|
|
||||||
exit_review_mode(sess, sub_id, None).await;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
self.handle.abort();
|
self.handle.abort();
|
||||||
let event = Event {
|
let event = Event {
|
||||||
id: self.sub_id,
|
id: self.sub_id.clone(),
|
||||||
msg: EventMsg::TurnAborted(TurnAbortedEvent { reason }),
|
msg: EventMsg::TurnAborted(TurnAbortedEvent { reason }),
|
||||||
};
|
};
|
||||||
let sess = self.sess;
|
let sess = self.sess;
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
|
if self.kind == AgentTaskKind::Review {
|
||||||
|
exit_review_mode(sess.clone(), self.sub_id, None).await;
|
||||||
|
}
|
||||||
sess.send_event(event).await;
|
sess.send_event(event).await;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -1560,7 +1556,8 @@ async fn spawn_review_thread(
|
|||||||
experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
|
experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
|
||||||
});
|
});
|
||||||
|
|
||||||
let base_instructions = Some(REVIEW_PROMPT.to_string());
|
let base_instructions = REVIEW_PROMPT.to_string();
|
||||||
|
let review_prompt = review_request.prompt.clone();
|
||||||
let provider = parent_turn_context.client.get_provider();
|
let provider = parent_turn_context.client.get_provider();
|
||||||
let auth_manager = parent_turn_context.client.get_auth_manager();
|
let auth_manager = parent_turn_context.client.get_auth_manager();
|
||||||
let model_family = review_model_family.clone();
|
let model_family = review_model_family.clone();
|
||||||
@@ -1569,16 +1566,19 @@ async fn spawn_review_thread(
|
|||||||
let mut per_turn_config = (*config).clone();
|
let mut per_turn_config = (*config).clone();
|
||||||
per_turn_config.model = model.clone();
|
per_turn_config.model = model.clone();
|
||||||
per_turn_config.model_family = model_family.clone();
|
per_turn_config.model_family = model_family.clone();
|
||||||
|
per_turn_config.model_reasoning_effort = Some(ReasoningEffortConfig::Low);
|
||||||
|
per_turn_config.model_reasoning_summary = ReasoningSummaryConfig::Detailed;
|
||||||
if let Some(model_info) = get_model_info(&model_family) {
|
if let Some(model_info) = get_model_info(&model_family) {
|
||||||
per_turn_config.model_context_window = Some(model_info.context_window);
|
per_turn_config.model_context_window = Some(model_info.context_window);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let per_turn_config = Arc::new(per_turn_config);
|
||||||
let client = ModelClient::new(
|
let client = ModelClient::new(
|
||||||
Arc::new(per_turn_config),
|
per_turn_config.clone(),
|
||||||
auth_manager,
|
auth_manager,
|
||||||
provider,
|
provider,
|
||||||
parent_turn_context.client.get_reasoning_effort(),
|
per_turn_config.model_reasoning_effort,
|
||||||
parent_turn_context.client.get_reasoning_summary(),
|
per_turn_config.model_reasoning_summary,
|
||||||
sess.conversation_id,
|
sess.conversation_id,
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -1586,7 +1586,7 @@ async fn spawn_review_thread(
|
|||||||
client,
|
client,
|
||||||
tools_config,
|
tools_config,
|
||||||
user_instructions: None,
|
user_instructions: None,
|
||||||
base_instructions,
|
base_instructions: Some(base_instructions.clone()),
|
||||||
approval_policy: parent_turn_context.approval_policy,
|
approval_policy: parent_turn_context.approval_policy,
|
||||||
sandbox_policy: parent_turn_context.sandbox_policy.clone(),
|
sandbox_policy: parent_turn_context.sandbox_policy.clone(),
|
||||||
shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
|
shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
|
||||||
@@ -1596,7 +1596,7 @@ async fn spawn_review_thread(
|
|||||||
|
|
||||||
// Seed the child task with the review prompt as the initial user message.
|
// Seed the child task with the review prompt as the initial user message.
|
||||||
let input: Vec<InputItem> = vec![InputItem::Text {
|
let input: Vec<InputItem> = vec![InputItem::Text {
|
||||||
text: review_request.prompt.clone(),
|
text: format!("{base_instructions}\n\n---\n\nNow, here's your task: {review_prompt}"),
|
||||||
}];
|
}];
|
||||||
let tc = Arc::new(review_turn_context);
|
let tc = Arc::new(review_turn_context);
|
||||||
|
|
||||||
@@ -1654,6 +1654,8 @@ async fn run_task(
|
|||||||
let is_review_mode = turn_context.is_review_mode;
|
let is_review_mode = turn_context.is_review_mode;
|
||||||
let mut review_thread_history: Vec<ResponseItem> = Vec::new();
|
let mut review_thread_history: Vec<ResponseItem> = Vec::new();
|
||||||
if is_review_mode {
|
if is_review_mode {
|
||||||
|
// Seed review threads with environment context so the model knows the working directory.
|
||||||
|
review_thread_history.extend(sess.build_initial_context(turn_context.as_ref()));
|
||||||
review_thread_history.push(initial_input_for_turn.into());
|
review_thread_history.push(initial_input_for_turn.into());
|
||||||
} else {
|
} else {
|
||||||
sess.record_input_and_rollout_usermsg(&initial_input_for_turn)
|
sess.record_input_and_rollout_usermsg(&initial_input_for_turn)
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ use toml_edit::Item as TomlItem;
|
|||||||
use toml_edit::Table as TomlTable;
|
use toml_edit::Table as TomlTable;
|
||||||
|
|
||||||
const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
|
const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
|
||||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5";
|
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5-codex";
|
||||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5-codex";
|
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5-codex";
|
||||||
|
|
||||||
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
||||||
@@ -1581,7 +1581,7 @@ model_verbosity = "high"
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
Config {
|
Config {
|
||||||
model: "o3".to_string(),
|
model: "o3".to_string(),
|
||||||
review_model: "gpt-5".to_string(),
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||||
model_context_window: Some(200_000),
|
model_context_window: Some(200_000),
|
||||||
model_max_output_tokens: Some(100_000),
|
model_max_output_tokens: Some(100_000),
|
||||||
@@ -1639,7 +1639,7 @@ model_verbosity = "high"
|
|||||||
)?;
|
)?;
|
||||||
let expected_gpt3_profile_config = Config {
|
let expected_gpt3_profile_config = Config {
|
||||||
model: "gpt-3.5-turbo".to_string(),
|
model: "gpt-3.5-turbo".to_string(),
|
||||||
review_model: "gpt-5".to_string(),
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||||
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
|
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
|
||||||
model_context_window: Some(16_385),
|
model_context_window: Some(16_385),
|
||||||
model_max_output_tokens: Some(4_096),
|
model_max_output_tokens: Some(4_096),
|
||||||
@@ -1712,7 +1712,7 @@ model_verbosity = "high"
|
|||||||
)?;
|
)?;
|
||||||
let expected_zdr_profile_config = Config {
|
let expected_zdr_profile_config = Config {
|
||||||
model: "o3".to_string(),
|
model: "o3".to_string(),
|
||||||
review_model: "gpt-5".to_string(),
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||||
model_context_window: Some(200_000),
|
model_context_window: Some(200_000),
|
||||||
model_max_output_tokens: Some(100_000),
|
model_max_output_tokens: Some(100_000),
|
||||||
@@ -1771,7 +1771,7 @@ model_verbosity = "high"
|
|||||||
)?;
|
)?;
|
||||||
let expected_gpt5_profile_config = Config {
|
let expected_gpt5_profile_config = Config {
|
||||||
model: "gpt-5".to_string(),
|
model: "gpt-5".to_string(),
|
||||||
review_model: "gpt-5".to_string(),
|
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||||
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
||||||
model_context_window: Some(272_000),
|
model_context_window: Some(272_000),
|
||||||
model_max_output_tokens: Some(128_000),
|
model_max_output_tokens: Some(128_000),
|
||||||
|
|||||||
@@ -88,6 +88,7 @@ pub use codex_protocol::config_types as protocol_config_types;
|
|||||||
|
|
||||||
pub use client::ModelClient;
|
pub use client::ModelClient;
|
||||||
pub use client_common::Prompt;
|
pub use client_common::Prompt;
|
||||||
|
pub use client_common::REVIEW_PROMPT;
|
||||||
pub use client_common::ResponseEvent;
|
pub use client_common::ResponseEvent;
|
||||||
pub use client_common::ResponseStream;
|
pub use client_common::ResponseStream;
|
||||||
pub use codex_protocol::models::ContentItem;
|
pub use codex_protocol::models::ContentItem;
|
||||||
|
|||||||
@@ -2,8 +2,10 @@ use codex_core::CodexAuth;
|
|||||||
use codex_core::CodexConversation;
|
use codex_core::CodexConversation;
|
||||||
use codex_core::ConversationManager;
|
use codex_core::ConversationManager;
|
||||||
use codex_core::ModelProviderInfo;
|
use codex_core::ModelProviderInfo;
|
||||||
|
use codex_core::REVIEW_PROMPT;
|
||||||
use codex_core::built_in_model_providers;
|
use codex_core::built_in_model_providers;
|
||||||
use codex_core::config::Config;
|
use codex_core::config::Config;
|
||||||
|
use codex_core::protocol::ENVIRONMENT_CONTEXT_OPEN_TAG;
|
||||||
use codex_core::protocol::EventMsg;
|
use codex_core::protocol::EventMsg;
|
||||||
use codex_core::protocol::ExitedReviewModeEvent;
|
use codex_core::protocol::ExitedReviewModeEvent;
|
||||||
use codex_core::protocol::InputItem;
|
use codex_core::protocol::InputItem;
|
||||||
@@ -419,17 +421,36 @@ async fn review_input_isolated_from_parent_history() {
|
|||||||
.await;
|
.await;
|
||||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||||
|
|
||||||
// Assert the request `input` contains only the single review user message.
|
// Assert the request `input` contains the environment context followed by the review prompt.
|
||||||
let request = &server.received_requests().await.unwrap()[0];
|
let request = &server.received_requests().await.unwrap()[0];
|
||||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||||
let expected_input = serde_json::json!([
|
let input = body["input"].as_array().expect("input array");
|
||||||
{
|
assert_eq!(
|
||||||
"type": "message",
|
input.len(),
|
||||||
"role": "user",
|
2,
|
||||||
"content": [{"type": "input_text", "text": review_prompt}]
|
"expected environment context and review prompt"
|
||||||
}
|
);
|
||||||
]);
|
|
||||||
assert_eq!(body["input"], expected_input);
|
let env_msg = &input[0];
|
||||||
|
assert_eq!(env_msg["type"].as_str().unwrap(), "message");
|
||||||
|
assert_eq!(env_msg["role"].as_str().unwrap(), "user");
|
||||||
|
let env_text = env_msg["content"][0]["text"].as_str().expect("env text");
|
||||||
|
assert!(
|
||||||
|
env_text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG),
|
||||||
|
"environment context must be the first item"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
env_text.contains("<cwd>"),
|
||||||
|
"environment context should include cwd"
|
||||||
|
);
|
||||||
|
|
||||||
|
let review_msg = &input[1];
|
||||||
|
assert_eq!(review_msg["type"].as_str().unwrap(), "message");
|
||||||
|
assert_eq!(review_msg["role"].as_str().unwrap(), "user");
|
||||||
|
assert_eq!(
|
||||||
|
review_msg["content"][0]["text"].as_str().unwrap(),
|
||||||
|
format!("{REVIEW_PROMPT}\n\n---\n\nNow, here's your task: Please review only this",)
|
||||||
|
);
|
||||||
|
|
||||||
server.verify().await;
|
server.verify().await;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user