Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
This commit is contained in:
@@ -32,6 +32,7 @@ use toml::Value as TomlValue;
|
||||
use toml_edit::DocumentMut;
|
||||
|
||||
const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5";
|
||||
pub const GPT5_HIGH_MODEL: &str = "gpt-5-high-new";
|
||||
|
||||
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
||||
@@ -47,6 +48,9 @@ pub struct Config {
|
||||
/// Optional override of model selection.
|
||||
pub model: String,
|
||||
|
||||
/// Model used specifically for review sessions. Defaults to "gpt-5".
|
||||
pub review_model: String,
|
||||
|
||||
pub model_family: ModelFamily,
|
||||
|
||||
/// Size of the context window for the model, in tokens.
|
||||
@@ -512,6 +516,8 @@ fn apply_toml_override(root: &mut TomlValue, path: &str, value: TomlValue) {
|
||||
pub struct ConfigToml {
|
||||
/// Optional override of model selection.
|
||||
pub model: Option<String>,
|
||||
/// Review model override used by the `/review` feature.
|
||||
pub review_model: Option<String>,
|
||||
|
||||
/// Provider to use from the model_providers map.
|
||||
pub model_provider: Option<String>,
|
||||
@@ -740,6 +746,7 @@ impl ConfigToml {
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct ConfigOverrides {
|
||||
pub model: Option<String>,
|
||||
pub review_model: Option<String>,
|
||||
pub cwd: Option<PathBuf>,
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
pub sandbox_mode: Option<SandboxMode>,
|
||||
@@ -767,6 +774,7 @@ impl Config {
|
||||
// Destructure ConfigOverrides fully to ensure all overrides are applied.
|
||||
let ConfigOverrides {
|
||||
model,
|
||||
review_model: override_review_model,
|
||||
cwd,
|
||||
approval_policy,
|
||||
sandbox_mode,
|
||||
@@ -902,8 +910,14 @@ impl Config {
|
||||
Self::get_base_instructions(experimental_instructions_path, &resolved_cwd)?;
|
||||
let base_instructions = base_instructions.or(file_base_instructions);
|
||||
|
||||
// Default review model when not set in config; allow CLI override to take precedence.
|
||||
let review_model = override_review_model
|
||||
.or(cfg.review_model)
|
||||
.unwrap_or_else(default_review_model);
|
||||
|
||||
let config = Self {
|
||||
model,
|
||||
review_model,
|
||||
model_family,
|
||||
model_context_window,
|
||||
model_max_output_tokens,
|
||||
@@ -1027,6 +1041,10 @@ fn default_model() -> String {
|
||||
OPENAI_DEFAULT_MODEL.to_string()
|
||||
}
|
||||
|
||||
fn default_review_model() -> String {
|
||||
OPENAI_DEFAULT_REVIEW_MODEL.to_string()
|
||||
}
|
||||
|
||||
/// Returns the path to the Codex configuration directory, which can be
|
||||
/// specified by the `CODEX_HOME` environment variable. If not set, defaults to
|
||||
/// `~/.codex`.
|
||||
@@ -1439,6 +1457,7 @@ model_verbosity = "high"
|
||||
assert_eq!(
|
||||
Config {
|
||||
model: "o3".to_string(),
|
||||
review_model: "gpt-5".to_string(),
|
||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
@@ -1496,6 +1515,7 @@ model_verbosity = "high"
|
||||
)?;
|
||||
let expected_gpt3_profile_config = Config {
|
||||
model: "gpt-3.5-turbo".to_string(),
|
||||
review_model: "gpt-5".to_string(),
|
||||
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
|
||||
model_context_window: Some(16_385),
|
||||
model_max_output_tokens: Some(4_096),
|
||||
@@ -1568,6 +1588,7 @@ model_verbosity = "high"
|
||||
)?;
|
||||
let expected_zdr_profile_config = Config {
|
||||
model: "o3".to_string(),
|
||||
review_model: "gpt-5".to_string(),
|
||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
@@ -1626,6 +1647,7 @@ model_verbosity = "high"
|
||||
)?;
|
||||
let expected_gpt5_profile_config = Config {
|
||||
model: "gpt-5".to_string(),
|
||||
review_model: "gpt-5".to_string(),
|
||||
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
||||
model_context_window: Some(272_000),
|
||||
model_max_output_tokens: Some(128_000),
|
||||
|
||||
Reference in New Issue
Block a user