feat: context compaction (#3446)

## Compact feature: 1. Stops the model when the context window become too large 2. Add a user turn, asking for the model to summarize 3. Build a bridge that contains all the previous user message + the summary. Rendered from a template 4. Start sampling again from a clean conversation with only that bridge
2025-09-12 13:07:10 -07:00
parent d4848e558b
commit ea225df22e
14 changed files with 1243 additions and 326 deletions
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -55,6 +55,9 @@ pub struct Config {
    /// Maximum number of output tokens.
    pub model_max_output_tokens: Option<u64>,

+    /// Token usage threshold triggering auto-compaction of conversation history.
+    pub model_auto_compact_token_limit: Option<i64>,
+
    /// Key into the model_providers map that specifies which provider to use.
    pub model_provider_id: String,

@@ -519,6 +522,9 @@ pub struct ConfigToml {
    /// Maximum number of output tokens.
    pub model_max_output_tokens: Option<u64>,

+    /// Token usage threshold triggering auto-compaction of conversation history.
+    pub model_auto_compact_token_limit: Option<i64>,
+
    /// Default approval policy for executing commands.
    pub approval_policy: Option<AskForApproval>,

@@ -877,6 +883,11 @@ impl Config {
                .as_ref()
                .map(|info| info.max_output_tokens)
        });
+        let model_auto_compact_token_limit = cfg.model_auto_compact_token_limit.or_else(|| {
+            openai_model_info
+                .as_ref()
+                .and_then(|info| info.auto_compact_token_limit)
+        });

        let experimental_resume = cfg.experimental_resume;

@@ -896,6 +907,7 @@ impl Config {
            model_family,
            model_context_window,
            model_max_output_tokens,
+            model_auto_compact_token_limit,
            model_provider_id,
            model_provider,
            cwd: resolved_cwd,
@@ -1430,6 +1442,7 @@ model_verbosity = "high"
                model_family: find_family_for_model("o3").expect("known model slug"),
                model_context_window: Some(200_000),
                model_max_output_tokens: Some(100_000),
+                model_auto_compact_token_limit: None,
                model_provider_id: "openai".to_string(),
                model_provider: fixture.openai_provider.clone(),
                approval_policy: AskForApproval::Never,
@@ -1486,6 +1499,7 @@ model_verbosity = "high"
            model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
            model_context_window: Some(16_385),
            model_max_output_tokens: Some(4_096),
+            model_auto_compact_token_limit: None,
            model_provider_id: "openai-chat-completions".to_string(),
            model_provider: fixture.openai_chat_completions_provider.clone(),
            approval_policy: AskForApproval::UnlessTrusted,
@@ -1557,6 +1571,7 @@ model_verbosity = "high"
            model_family: find_family_for_model("o3").expect("known model slug"),
            model_context_window: Some(200_000),
            model_max_output_tokens: Some(100_000),
+            model_auto_compact_token_limit: None,
            model_provider_id: "openai".to_string(),
            model_provider: fixture.openai_provider.clone(),
            approval_policy: AskForApproval::OnFailure,
@@ -1614,6 +1629,7 @@ model_verbosity = "high"
            model_family: find_family_for_model("gpt-5").expect("known model slug"),
            model_context_window: Some(272_000),
            model_max_output_tokens: Some(128_000),
+            model_auto_compact_token_limit: None,
            model_provider_id: "openai".to_string(),
            model_provider: fixture.openai_provider.clone(),
            approval_policy: AskForApproval::OnFailure,