Review Mode (Core) (#3401)

## 📝 Review Mode -- Core This PR introduces the Core implementation for Review mode: - New op `Op::Review { prompt: String }:` spawns a child review task with isolated context, a review‑specific system prompt, and a `Config.review_model`. - `EnteredReviewMode`: emitted when the child review session starts. Every event from this point onwards reflects the review session. - `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review finishes or is interrupted, with optional structured findings: ```json { "findings": [ { "title": "<≤ 80 chars, imperative>", "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>", "confidence_score": <float 0.0-1.0>, "priority": <int 0-3>, "code_location": { "absolute_file_path": "<file path>", "line_range": {"start": <int>, "end": <int>} } } ], "overall_correctness": "patch is correct" | "patch is incorrect", "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>", "overall_confidence_score": <float 0.0-1.0> } ``` ## Questions ### Why separate out its own message history? We want the review thread to match the training of our review models as much as possible -- that means using a custom prompt, removing user instructions, and starting a clean chat history. We also want to make sure the review thread doesn't leak into the parent thread. ### Why do this as a mode, vs. sub-agents? 1. We want review to be a synchronous task, so it's fine for now to do a bespoke implementation. 2. We're still unclear about the final structure for sub-agents. We'd prefer to land this quickly and then refactor into sub-agents without rushing that implementation.
2025-09-12 16:25:10 -07:00
parent 8d56d2f655
commit 90a0fd342f
15 changed files with 976 additions and 19 deletions
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -32,6 +32,7 @@ use toml::Value as TomlValue;
 use toml_edit::DocumentMut;

 const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
+const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5";
 pub const GPT5_HIGH_MODEL: &str = "gpt-5-high-new";

 /// Maximum number of bytes of the documentation that will be embedded. Larger
@@ -47,6 +48,9 @@ pub struct Config {
    /// Optional override of model selection.
    pub model: String,

+    /// Model used specifically for review sessions. Defaults to "gpt-5".
+    pub review_model: String,
+
    pub model_family: ModelFamily,

    /// Size of the context window for the model, in tokens.
@@ -512,6 +516,8 @@ fn apply_toml_override(root: &mut TomlValue, path: &str, value: TomlValue) {
 pub struct ConfigToml {
    /// Optional override of model selection.
    pub model: Option<String>,
+    /// Review model override used by the `/review` feature.
+    pub review_model: Option<String>,

    /// Provider to use from the model_providers map.
    pub model_provider: Option<String>,
@@ -740,6 +746,7 @@ impl ConfigToml {
 #[derive(Default, Debug, Clone)]
 pub struct ConfigOverrides {
    pub model: Option<String>,
+    pub review_model: Option<String>,
    pub cwd: Option<PathBuf>,
    pub approval_policy: Option<AskForApproval>,
    pub sandbox_mode: Option<SandboxMode>,
@@ -767,6 +774,7 @@ impl Config {
        // Destructure ConfigOverrides fully to ensure all overrides are applied.
        let ConfigOverrides {
            model,
+            review_model: override_review_model,
            cwd,
            approval_policy,
            sandbox_mode,
@@ -902,8 +910,14 @@ impl Config {
            Self::get_base_instructions(experimental_instructions_path, &resolved_cwd)?;
        let base_instructions = base_instructions.or(file_base_instructions);

+        // Default review model when not set in config; allow CLI override to take precedence.
+        let review_model = override_review_model
+            .or(cfg.review_model)
+            .unwrap_or_else(default_review_model);
+
        let config = Self {
            model,
+            review_model,
            model_family,
            model_context_window,
            model_max_output_tokens,
@@ -1027,6 +1041,10 @@ fn default_model() -> String {
    OPENAI_DEFAULT_MODEL.to_string()
 }

+fn default_review_model() -> String {
+    OPENAI_DEFAULT_REVIEW_MODEL.to_string()
+}
+
 /// Returns the path to the Codex configuration directory, which can be
 /// specified by the `CODEX_HOME` environment variable. If not set, defaults to
 /// `~/.codex`.
@@ -1439,6 +1457,7 @@ model_verbosity = "high"
        assert_eq!(
            Config {
                model: "o3".to_string(),
+                review_model: "gpt-5".to_string(),
                model_family: find_family_for_model("o3").expect("known model slug"),
                model_context_window: Some(200_000),
                model_max_output_tokens: Some(100_000),
@@ -1496,6 +1515,7 @@ model_verbosity = "high"
        )?;
        let expected_gpt3_profile_config = Config {
            model: "gpt-3.5-turbo".to_string(),
+            review_model: "gpt-5".to_string(),
            model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
            model_context_window: Some(16_385),
            model_max_output_tokens: Some(4_096),
@@ -1568,6 +1588,7 @@ model_verbosity = "high"
        )?;
        let expected_zdr_profile_config = Config {
            model: "o3".to_string(),
+            review_model: "gpt-5".to_string(),
            model_family: find_family_for_model("o3").expect("known model slug"),
            model_context_window: Some(200_000),
            model_max_output_tokens: Some(100_000),
@@ -1626,6 +1647,7 @@ model_verbosity = "high"
        )?;
        let expected_gpt5_profile_config = Config {
            model: "gpt-5".to_string(),
+            review_model: "gpt-5".to_string(),
            model_family: find_family_for_model("gpt-5").expect("known model slug"),
            model_context_window: Some(272_000),
            model_max_output_tokens: Some(128_000),