feat: context compaction (#3446)

## Compact feature: 1. Stops the model when the context window become too large 2. Add a user turn, asking for the model to summarize 3. Build a bridge that contains all the previous user message + the summary. Rendered from a template 4. Start sampling again from a clean conversation with only that bridge
2025-09-12 13:07:10 -07:00
parent d4848e558b
commit ea225df22e
14 changed files with 1243 additions and 326 deletions
--- a/codex-rs/core/src/openai_model_info.rs
+++ b/codex-rs/core/src/openai_model_info.rs
@@ -12,6 +12,19 @@ pub(crate) struct ModelInfo {

    /// Maximum number of output tokens that can be generated for the model.
    pub(crate) max_output_tokens: u64,
+
+    /// Token threshold where we should automatically compact conversation history.
+    pub(crate) auto_compact_token_limit: Option<i64>,
+}
+
+impl ModelInfo {
+    const fn new(context_window: u64, max_output_tokens: u64) -> Self {
+        Self {
+            context_window,
+            max_output_tokens,
+            auto_compact_token_limit: None,
+        }
+    }
 }

 pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
@@ -20,73 +33,37 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
        // OSS models have a 128k shared token pool.
        // Arbitrarily splitting it: 3/4 input context, 1/4 output.
        // https://openai.com/index/gpt-oss-model-card/
-        "gpt-oss-20b" => Some(ModelInfo {
-            context_window: 96_000,
-            max_output_tokens: 32_000,
-        }),
-        "gpt-oss-120b" => Some(ModelInfo {
-            context_window: 96_000,
-            max_output_tokens: 32_000,
-        }),
+        "gpt-oss-20b" => Some(ModelInfo::new(96_000, 32_000)),
+        "gpt-oss-120b" => Some(ModelInfo::new(96_000, 32_000)),
        // https://platform.openai.com/docs/models/o3
-        "o3" => Some(ModelInfo {
-            context_window: 200_000,
-            max_output_tokens: 100_000,
-        }),
+        "o3" => Some(ModelInfo::new(200_000, 100_000)),

        // https://platform.openai.com/docs/models/o4-mini
-        "o4-mini" => Some(ModelInfo {
-            context_window: 200_000,
-            max_output_tokens: 100_000,
-        }),
+        "o4-mini" => Some(ModelInfo::new(200_000, 100_000)),

        // https://platform.openai.com/docs/models/codex-mini-latest
-        "codex-mini-latest" => Some(ModelInfo {
-            context_window: 200_000,
-            max_output_tokens: 100_000,
-        }),
+        "codex-mini-latest" => Some(ModelInfo::new(200_000, 100_000)),

        // As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
        // https://platform.openai.com/docs/models/gpt-4.1
-        "gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo {
-            context_window: 1_047_576,
-            max_output_tokens: 32_768,
-        }),
+        "gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo::new(1_047_576, 32_768)),

        // As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
        // https://platform.openai.com/docs/models/gpt-4o
-        "gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo {
-            context_window: 128_000,
-            max_output_tokens: 16_384,
-        }),
+        "gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo::new(128_000, 16_384)),

        // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
-        "gpt-4o-2024-05-13" => Some(ModelInfo {
-            context_window: 128_000,
-            max_output_tokens: 4_096,
-        }),
+        "gpt-4o-2024-05-13" => Some(ModelInfo::new(128_000, 4_096)),

        // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
-        "gpt-4o-2024-11-20" => Some(ModelInfo {
-            context_window: 128_000,
-            max_output_tokens: 16_384,
-        }),
+        "gpt-4o-2024-11-20" => Some(ModelInfo::new(128_000, 16_384)),

        // https://platform.openai.com/docs/models/gpt-3.5-turbo
-        "gpt-3.5-turbo" => Some(ModelInfo {
-            context_window: 16_385,
-            max_output_tokens: 4_096,
-        }),
+        "gpt-3.5-turbo" => Some(ModelInfo::new(16_385, 4_096)),

-        _ if slug.starts_with("gpt-5") => Some(ModelInfo {
-            context_window: 272_000,
-            max_output_tokens: 128_000,
-        }),
+        _ if slug.starts_with("gpt-5") => Some(ModelInfo::new(272_000, 128_000)),

-        _ if slug.starts_with("codex-") => Some(ModelInfo {
-            context_window: 272_000,
-            max_output_tokens: 128_000,
-        }),
+        _ if slug.starts_with("codex-") => Some(ModelInfo::new(272_000, 128_000)),

        _ => None,
    }