Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
This commit is contained in:
dedrisian-oai
2025-09-12 16:25:10 -07:00
committed by GitHub
parent 8d56d2f655
commit 90a0fd342f
15 changed files with 976 additions and 19 deletions

View File

@@ -32,6 +32,7 @@ use toml::Value as TomlValue;
use toml_edit::DocumentMut;
const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5";
pub const GPT5_HIGH_MODEL: &str = "gpt-5-high-new";
/// Maximum number of bytes of the documentation that will be embedded. Larger
@@ -47,6 +48,9 @@ pub struct Config {
/// Optional override of model selection.
pub model: String,
/// Model used specifically for review sessions. Defaults to "gpt-5".
pub review_model: String,
pub model_family: ModelFamily,
/// Size of the context window for the model, in tokens.
@@ -512,6 +516,8 @@ fn apply_toml_override(root: &mut TomlValue, path: &str, value: TomlValue) {
pub struct ConfigToml {
/// Optional override of model selection.
pub model: Option<String>,
/// Review model override used by the `/review` feature.
pub review_model: Option<String>,
/// Provider to use from the model_providers map.
pub model_provider: Option<String>,
@@ -740,6 +746,7 @@ impl ConfigToml {
#[derive(Default, Debug, Clone)]
pub struct ConfigOverrides {
pub model: Option<String>,
pub review_model: Option<String>,
pub cwd: Option<PathBuf>,
pub approval_policy: Option<AskForApproval>,
pub sandbox_mode: Option<SandboxMode>,
@@ -767,6 +774,7 @@ impl Config {
// Destructure ConfigOverrides fully to ensure all overrides are applied.
let ConfigOverrides {
model,
review_model: override_review_model,
cwd,
approval_policy,
sandbox_mode,
@@ -902,8 +910,14 @@ impl Config {
Self::get_base_instructions(experimental_instructions_path, &resolved_cwd)?;
let base_instructions = base_instructions.or(file_base_instructions);
// Default review model when not set in config; allow CLI override to take precedence.
let review_model = override_review_model
.or(cfg.review_model)
.unwrap_or_else(default_review_model);
let config = Self {
model,
review_model,
model_family,
model_context_window,
model_max_output_tokens,
@@ -1027,6 +1041,10 @@ fn default_model() -> String {
OPENAI_DEFAULT_MODEL.to_string()
}
fn default_review_model() -> String {
OPENAI_DEFAULT_REVIEW_MODEL.to_string()
}
/// Returns the path to the Codex configuration directory, which can be
/// specified by the `CODEX_HOME` environment variable. If not set, defaults to
/// `~/.codex`.
@@ -1439,6 +1457,7 @@ model_verbosity = "high"
assert_eq!(
Config {
model: "o3".to_string(),
review_model: "gpt-5".to_string(),
model_family: find_family_for_model("o3").expect("known model slug"),
model_context_window: Some(200_000),
model_max_output_tokens: Some(100_000),
@@ -1496,6 +1515,7 @@ model_verbosity = "high"
)?;
let expected_gpt3_profile_config = Config {
model: "gpt-3.5-turbo".to_string(),
review_model: "gpt-5".to_string(),
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
model_context_window: Some(16_385),
model_max_output_tokens: Some(4_096),
@@ -1568,6 +1588,7 @@ model_verbosity = "high"
)?;
let expected_zdr_profile_config = Config {
model: "o3".to_string(),
review_model: "gpt-5".to_string(),
model_family: find_family_for_model("o3").expect("known model slug"),
model_context_window: Some(200_000),
model_max_output_tokens: Some(100_000),
@@ -1626,6 +1647,7 @@ model_verbosity = "high"
)?;
let expected_gpt5_profile_config = Config {
model: "gpt-5".to_string(),
review_model: "gpt-5".to_string(),
model_family: find_family_for_model("gpt-5").expect("known model slug"),
model_context_window: Some(272_000),
model_max_output_tokens: Some(128_000),