Auto compact at ~90% (#5292)
Users now hit a window exceeded limit and they usually don't know what to do. This starts auto compact at ~90% of the window.
This commit is contained in:
@@ -11,6 +11,7 @@ In the codex-rs folder where the rust code lives:
|
|||||||
- Always collapse if statements per https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_if
|
- Always collapse if statements per https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_if
|
||||||
- Always inline format! args when possible per https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args
|
- Always inline format! args when possible per https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args
|
||||||
- Use method references over closures when possible per https://rust-lang.github.io/rust-clippy/master/index.html#redundant_closure_for_method_calls
|
- Use method references over closures when possible per https://rust-lang.github.io/rust-clippy/master/index.html#redundant_closure_for_method_calls
|
||||||
|
- Do not use unsigned integer even if the number cannot be negative.
|
||||||
- When writing tests, prefer comparing the equality of entire objects over fields one by one.
|
- When writing tests, prefer comparing the equality of entire objects over fields one by one.
|
||||||
- When making a change that adds or changes an API, ensure that the documentation in the `docs/` folder is up to date if applicable.
|
- When making a change that adds or changes an API, ensure that the documentation in the `docs/` folder is up to date if applicable.
|
||||||
|
|
||||||
|
|||||||
@@ -112,10 +112,12 @@ impl ModelClient {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_model_context_window(&self) -> Option<u64> {
|
pub fn get_model_context_window(&self) -> Option<i64> {
|
||||||
|
let pct = self.config.model_family.effective_context_window_percent;
|
||||||
self.config
|
self.config
|
||||||
.model_context_window
|
.model_context_window
|
||||||
.or_else(|| get_model_info(&self.config.model_family).map(|info| info.context_window))
|
.or_else(|| get_model_info(&self.config.model_family).map(|info| info.context_window))
|
||||||
|
.map(|w| w.saturating_mul(pct) / 100)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_auto_compact_token_limit(&self) -> Option<i64> {
|
pub fn get_auto_compact_token_limit(&self) -> Option<i64> {
|
||||||
@@ -544,11 +546,11 @@ struct ResponseCompleted {
|
|||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
struct ResponseCompletedUsage {
|
struct ResponseCompletedUsage {
|
||||||
input_tokens: u64,
|
input_tokens: i64,
|
||||||
input_tokens_details: Option<ResponseCompletedInputTokensDetails>,
|
input_tokens_details: Option<ResponseCompletedInputTokensDetails>,
|
||||||
output_tokens: u64,
|
output_tokens: i64,
|
||||||
output_tokens_details: Option<ResponseCompletedOutputTokensDetails>,
|
output_tokens_details: Option<ResponseCompletedOutputTokensDetails>,
|
||||||
total_tokens: u64,
|
total_tokens: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<ResponseCompletedUsage> for TokenUsage {
|
impl From<ResponseCompletedUsage> for TokenUsage {
|
||||||
@@ -571,12 +573,12 @@ impl From<ResponseCompletedUsage> for TokenUsage {
|
|||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
struct ResponseCompletedInputTokensDetails {
|
struct ResponseCompletedInputTokensDetails {
|
||||||
cached_tokens: u64,
|
cached_tokens: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
struct ResponseCompletedOutputTokensDetails {
|
struct ResponseCompletedOutputTokensDetails {
|
||||||
reasoning_tokens: u64,
|
reasoning_tokens: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn attach_item_ids(payload_json: &mut Value, original_items: &[ResponseItem]) {
|
fn attach_item_ids(payload_json: &mut Value, original_items: &[ResponseItem]) {
|
||||||
@@ -633,7 +635,7 @@ fn parse_rate_limit_window(
|
|||||||
let used_percent: Option<f64> = parse_header_f64(headers, used_percent_header);
|
let used_percent: Option<f64> = parse_header_f64(headers, used_percent_header);
|
||||||
|
|
||||||
used_percent.and_then(|used_percent| {
|
used_percent.and_then(|used_percent| {
|
||||||
let window_minutes = parse_header_u64(headers, window_minutes_header);
|
let window_minutes = parse_header_i64(headers, window_minutes_header);
|
||||||
let resets_at = parse_header_str(headers, resets_header)
|
let resets_at = parse_header_str(headers, resets_header)
|
||||||
.map(str::trim)
|
.map(str::trim)
|
||||||
.filter(|value| !value.is_empty())
|
.filter(|value| !value.is_empty())
|
||||||
@@ -658,8 +660,8 @@ fn parse_header_f64(headers: &HeaderMap, name: &str) -> Option<f64> {
|
|||||||
.filter(|v| v.is_finite())
|
.filter(|v| v.is_finite())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_header_u64(headers: &HeaderMap, name: &str) -> Option<u64> {
|
fn parse_header_i64(headers: &HeaderMap, name: &str) -> Option<i64> {
|
||||||
parse_header_str(headers, name)?.parse::<u64>().ok()
|
parse_header_str(headers, name)?.parse::<i64>().ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_header_str<'a>(headers: &'a HeaderMap, name: &str) -> Option<&'a str> {
|
fn parse_header_str<'a>(headers: &'a HeaderMap, name: &str) -> Option<&'a str> {
|
||||||
|
|||||||
@@ -1778,7 +1778,7 @@ pub(crate) async fn run_task(
|
|||||||
.as_ref()
|
.as_ref()
|
||||||
.map(TokenUsage::tokens_in_context_window);
|
.map(TokenUsage::tokens_in_context_window);
|
||||||
let token_limit_reached = total_usage_tokens
|
let token_limit_reached = total_usage_tokens
|
||||||
.map(|tokens| (tokens as i64) >= limit)
|
.map(|tokens| tokens >= limit)
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
|
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
|
||||||
let mut responses = Vec::<ResponseInputItem>::new();
|
let mut responses = Vec::<ResponseInputItem>::new();
|
||||||
|
|||||||
@@ -85,10 +85,10 @@ pub struct Config {
|
|||||||
pub model_family: ModelFamily,
|
pub model_family: ModelFamily,
|
||||||
|
|
||||||
/// Size of the context window for the model, in tokens.
|
/// Size of the context window for the model, in tokens.
|
||||||
pub model_context_window: Option<u64>,
|
pub model_context_window: Option<i64>,
|
||||||
|
|
||||||
/// Maximum number of output tokens.
|
/// Maximum number of output tokens.
|
||||||
pub model_max_output_tokens: Option<u64>,
|
pub model_max_output_tokens: Option<i64>,
|
||||||
|
|
||||||
/// Token usage threshold triggering auto-compaction of conversation history.
|
/// Token usage threshold triggering auto-compaction of conversation history.
|
||||||
pub model_auto_compact_token_limit: Option<i64>,
|
pub model_auto_compact_token_limit: Option<i64>,
|
||||||
@@ -824,10 +824,10 @@ pub struct ConfigToml {
|
|||||||
pub model_provider: Option<String>,
|
pub model_provider: Option<String>,
|
||||||
|
|
||||||
/// Size of the context window for the model, in tokens.
|
/// Size of the context window for the model, in tokens.
|
||||||
pub model_context_window: Option<u64>,
|
pub model_context_window: Option<i64>,
|
||||||
|
|
||||||
/// Maximum number of output tokens.
|
/// Maximum number of output tokens.
|
||||||
pub model_max_output_tokens: Option<u64>,
|
pub model_max_output_tokens: Option<i64>,
|
||||||
|
|
||||||
/// Token usage threshold triggering auto-compaction of conversation history.
|
/// Token usage threshold triggering auto-compaction of conversation history.
|
||||||
pub model_auto_compact_token_limit: Option<i64>,
|
pub model_auto_compact_token_limit: Option<i64>,
|
||||||
@@ -2805,7 +2805,7 @@ model_verbosity = "high"
|
|||||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||||
model_context_window: Some(200_000),
|
model_context_window: Some(200_000),
|
||||||
model_max_output_tokens: Some(100_000),
|
model_max_output_tokens: Some(100_000),
|
||||||
model_auto_compact_token_limit: None,
|
model_auto_compact_token_limit: Some(180_000),
|
||||||
model_provider_id: "openai".to_string(),
|
model_provider_id: "openai".to_string(),
|
||||||
model_provider: fixture.openai_provider.clone(),
|
model_provider: fixture.openai_provider.clone(),
|
||||||
approval_policy: AskForApproval::Never,
|
approval_policy: AskForApproval::Never,
|
||||||
@@ -2874,7 +2874,7 @@ model_verbosity = "high"
|
|||||||
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
|
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
|
||||||
model_context_window: Some(16_385),
|
model_context_window: Some(16_385),
|
||||||
model_max_output_tokens: Some(4_096),
|
model_max_output_tokens: Some(4_096),
|
||||||
model_auto_compact_token_limit: None,
|
model_auto_compact_token_limit: Some(14_746),
|
||||||
model_provider_id: "openai-chat-completions".to_string(),
|
model_provider_id: "openai-chat-completions".to_string(),
|
||||||
model_provider: fixture.openai_chat_completions_provider.clone(),
|
model_provider: fixture.openai_chat_completions_provider.clone(),
|
||||||
approval_policy: AskForApproval::UnlessTrusted,
|
approval_policy: AskForApproval::UnlessTrusted,
|
||||||
@@ -2958,7 +2958,7 @@ model_verbosity = "high"
|
|||||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||||
model_context_window: Some(200_000),
|
model_context_window: Some(200_000),
|
||||||
model_max_output_tokens: Some(100_000),
|
model_max_output_tokens: Some(100_000),
|
||||||
model_auto_compact_token_limit: None,
|
model_auto_compact_token_limit: Some(180_000),
|
||||||
model_provider_id: "openai".to_string(),
|
model_provider_id: "openai".to_string(),
|
||||||
model_provider: fixture.openai_provider.clone(),
|
model_provider: fixture.openai_provider.clone(),
|
||||||
approval_policy: AskForApproval::OnFailure,
|
approval_policy: AskForApproval::OnFailure,
|
||||||
@@ -3028,7 +3028,7 @@ model_verbosity = "high"
|
|||||||
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
||||||
model_context_window: Some(272_000),
|
model_context_window: Some(272_000),
|
||||||
model_max_output_tokens: Some(128_000),
|
model_max_output_tokens: Some(128_000),
|
||||||
model_auto_compact_token_limit: None,
|
model_auto_compact_token_limit: Some(244_800),
|
||||||
model_provider_id: "openai".to_string(),
|
model_provider_id: "openai".to_string(),
|
||||||
model_provider: fixture.openai_provider.clone(),
|
model_provider: fixture.openai_provider.clone(),
|
||||||
approval_policy: AskForApproval::OnFailure,
|
approval_policy: AskForApproval::OnFailure,
|
||||||
|
|||||||
@@ -48,6 +48,12 @@ pub struct ModelFamily {
|
|||||||
|
|
||||||
/// Names of beta tools that should be exposed to this model family.
|
/// Names of beta tools that should be exposed to this model family.
|
||||||
pub experimental_supported_tools: Vec<String>,
|
pub experimental_supported_tools: Vec<String>,
|
||||||
|
|
||||||
|
/// Percentage of the context window considered usable for inputs, after
|
||||||
|
/// reserving headroom for system prompts, tool overhead, and model output.
|
||||||
|
/// This is applied when computing the effective context window seen by
|
||||||
|
/// consumers.
|
||||||
|
pub effective_context_window_percent: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! model_family {
|
macro_rules! model_family {
|
||||||
@@ -66,6 +72,7 @@ macro_rules! model_family {
|
|||||||
apply_patch_tool_type: None,
|
apply_patch_tool_type: None,
|
||||||
base_instructions: BASE_INSTRUCTIONS.to_string(),
|
base_instructions: BASE_INSTRUCTIONS.to_string(),
|
||||||
experimental_supported_tools: Vec::new(),
|
experimental_supported_tools: Vec::new(),
|
||||||
|
effective_context_window_percent: 95,
|
||||||
};
|
};
|
||||||
// apply overrides
|
// apply overrides
|
||||||
$(
|
$(
|
||||||
@@ -175,5 +182,6 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
|
|||||||
apply_patch_tool_type: None,
|
apply_patch_tool_type: None,
|
||||||
base_instructions: BASE_INSTRUCTIONS.to_string(),
|
base_instructions: BASE_INSTRUCTIONS.to_string(),
|
||||||
experimental_supported_tools: Vec::new(),
|
experimental_supported_tools: Vec::new(),
|
||||||
|
effective_context_window_percent: 95,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
use crate::model_family::ModelFamily;
|
use crate::model_family::ModelFamily;
|
||||||
|
|
||||||
|
// Shared constants for commonly used window/token sizes.
|
||||||
|
pub(crate) const CONTEXT_WINDOW_272K: i64 = 272_000;
|
||||||
|
pub(crate) const MAX_OUTPUT_TOKENS_128K: i64 = 128_000;
|
||||||
|
|
||||||
/// Metadata about a model, particularly OpenAI models.
|
/// Metadata about a model, particularly OpenAI models.
|
||||||
/// We may want to consider including details like the pricing for
|
/// We may want to consider including details like the pricing for
|
||||||
/// input tokens, output tokens, etc., though users will need to be able to
|
/// input tokens, output tokens, etc., though users will need to be able to
|
||||||
@@ -8,10 +12,10 @@ use crate::model_family::ModelFamily;
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) struct ModelInfo {
|
pub(crate) struct ModelInfo {
|
||||||
/// Size of the context window in tokens. This is the maximum size of the input context.
|
/// Size of the context window in tokens. This is the maximum size of the input context.
|
||||||
pub(crate) context_window: u64,
|
pub(crate) context_window: i64,
|
||||||
|
|
||||||
/// Maximum number of output tokens that can be generated for the model.
|
/// Maximum number of output tokens that can be generated for the model.
|
||||||
pub(crate) max_output_tokens: u64,
|
pub(crate) max_output_tokens: i64,
|
||||||
|
|
||||||
/// Token threshold where we should automatically compact conversation history. This considers
|
/// Token threshold where we should automatically compact conversation history. This considers
|
||||||
/// input tokens + output tokens of this turn.
|
/// input tokens + output tokens of this turn.
|
||||||
@@ -19,13 +23,17 @@ pub(crate) struct ModelInfo {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl ModelInfo {
|
impl ModelInfo {
|
||||||
const fn new(context_window: u64, max_output_tokens: u64) -> Self {
|
const fn new(context_window: i64, max_output_tokens: i64) -> Self {
|
||||||
Self {
|
Self {
|
||||||
context_window,
|
context_window,
|
||||||
max_output_tokens,
|
max_output_tokens,
|
||||||
auto_compact_token_limit: None,
|
auto_compact_token_limit: Some(Self::default_auto_compact_limit(context_window)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const fn default_auto_compact_limit(context_window: i64) -> i64 {
|
||||||
|
(context_window * 9) / 10
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
|
pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
|
||||||
@@ -62,15 +70,17 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
|
|||||||
// https://platform.openai.com/docs/models/gpt-3.5-turbo
|
// https://platform.openai.com/docs/models/gpt-3.5-turbo
|
||||||
"gpt-3.5-turbo" => Some(ModelInfo::new(16_385, 4_096)),
|
"gpt-3.5-turbo" => Some(ModelInfo::new(16_385, 4_096)),
|
||||||
|
|
||||||
_ if slug.starts_with("gpt-5-codex") => Some(ModelInfo {
|
_ if slug.starts_with("gpt-5-codex") => {
|
||||||
context_window: 272_000,
|
Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
|
||||||
max_output_tokens: 128_000,
|
}
|
||||||
auto_compact_token_limit: Some(350_000),
|
|
||||||
}),
|
|
||||||
|
|
||||||
_ if slug.starts_with("gpt-5") => Some(ModelInfo::new(272_000, 128_000)),
|
_ if slug.starts_with("gpt-5") => {
|
||||||
|
Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
|
||||||
|
}
|
||||||
|
|
||||||
_ if slug.starts_with("codex-") => Some(ModelInfo::new(272_000, 128_000)),
|
_ if slug.starts_with("codex-") => {
|
||||||
|
Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
|
||||||
|
}
|
||||||
|
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ impl SessionState {
|
|||||||
pub(crate) fn update_token_info_from_usage(
|
pub(crate) fn update_token_info_from_usage(
|
||||||
&mut self,
|
&mut self,
|
||||||
usage: &TokenUsage,
|
usage: &TokenUsage,
|
||||||
model_context_window: Option<u64>,
|
model_context_window: Option<i64>,
|
||||||
) {
|
) {
|
||||||
self.token_info = TokenUsageInfo::new_or_append(
|
self.token_info = TokenUsageInfo::new_or_append(
|
||||||
&self.token_info,
|
&self.token_info,
|
||||||
@@ -67,7 +67,7 @@ impl SessionState {
|
|||||||
(self.token_info.clone(), self.latest_rate_limits.clone())
|
(self.token_info.clone(), self.latest_rate_limits.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn set_token_usage_full(&mut self, context_window: u64) {
|
pub(crate) fn set_token_usage_full(&mut self, context_window: i64) {
|
||||||
match &mut self.token_info {
|
match &mut self.token_info {
|
||||||
Some(info) => info.fill_to_context_window(context_window),
|
Some(info) => info.fill_to_context_window(context_window),
|
||||||
None => {
|
None => {
|
||||||
|
|||||||
@@ -138,7 +138,7 @@ pub fn ev_response_created(id: &str) -> Value {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ev_completed_with_tokens(id: &str, total_tokens: u64) -> Value {
|
pub fn ev_completed_with_tokens(id: &str, total_tokens: i64) -> Value {
|
||||||
serde_json::json!({
|
serde_json::json!({
|
||||||
"type": "response.completed",
|
"type": "response.completed",
|
||||||
"response": {
|
"response": {
|
||||||
|
|||||||
@@ -858,8 +858,8 @@ async fn token_count_includes_rate_limits_snapshot() {
|
|||||||
"reasoning_output_tokens": 0,
|
"reasoning_output_tokens": 0,
|
||||||
"total_tokens": 123
|
"total_tokens": 123
|
||||||
},
|
},
|
||||||
// Default model is gpt-5-codex in tests → 272000 context window
|
// Default model is gpt-5-codex in tests → 95% usable context window
|
||||||
"model_context_window": 272000
|
"model_context_window": 258400
|
||||||
},
|
},
|
||||||
"rate_limits": {
|
"rate_limits": {
|
||||||
"primary": {
|
"primary": {
|
||||||
@@ -985,6 +985,8 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
|
|||||||
skip_if_no_network!(Ok(()));
|
skip_if_no_network!(Ok(()));
|
||||||
let server = MockServer::start().await;
|
let server = MockServer::start().await;
|
||||||
|
|
||||||
|
const EFFECTIVE_CONTEXT_WINDOW: i64 = (272_000 * 95) / 100;
|
||||||
|
|
||||||
responses::mount_sse_once_match(
|
responses::mount_sse_once_match(
|
||||||
&server,
|
&server,
|
||||||
body_string_contains("trigger context window"),
|
body_string_contains("trigger context window"),
|
||||||
@@ -1056,8 +1058,11 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
|
|||||||
.info
|
.info
|
||||||
.expect("token usage info present when context window is exceeded");
|
.expect("token usage info present when context window is exceeded");
|
||||||
|
|
||||||
assert_eq!(info.model_context_window, Some(272_000));
|
assert_eq!(info.model_context_window, Some(EFFECTIVE_CONTEXT_WINDOW));
|
||||||
assert_eq!(info.total_token_usage.total_tokens, 272_000);
|
assert_eq!(
|
||||||
|
info.total_token_usage.total_tokens,
|
||||||
|
EFFECTIVE_CONTEXT_WINDOW
|
||||||
|
);
|
||||||
|
|
||||||
let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
|
let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
|
||||||
let expected_context_window_message = CodexErr::ContextWindowExceeded.to_string();
|
let expected_context_window_message = CodexErr::ContextWindowExceeded.to_string();
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ use core_test_support::responses::ev_assistant_message;
|
|||||||
use core_test_support::responses::ev_completed;
|
use core_test_support::responses::ev_completed;
|
||||||
use core_test_support::responses::ev_completed_with_tokens;
|
use core_test_support::responses::ev_completed_with_tokens;
|
||||||
use core_test_support::responses::ev_function_call;
|
use core_test_support::responses::ev_function_call;
|
||||||
|
use core_test_support::responses::mount_sse_once;
|
||||||
use core_test_support::responses::mount_sse_once_match;
|
use core_test_support::responses::mount_sse_once_match;
|
||||||
use core_test_support::responses::mount_sse_sequence;
|
use core_test_support::responses::mount_sse_sequence;
|
||||||
use core_test_support::responses::sse;
|
use core_test_support::responses::sse;
|
||||||
@@ -43,6 +44,7 @@ const CONTEXT_LIMIT_MESSAGE: &str =
|
|||||||
"Your input exceeds the context window of this model. Please adjust your input and try again.";
|
"Your input exceeds the context window of this model. Please adjust your input and try again.";
|
||||||
const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
|
const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
|
||||||
const DUMMY_CALL_ID: &str = "call-multi-auto";
|
const DUMMY_CALL_ID: &str = "call-multi-auto";
|
||||||
|
const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
async fn summarize_context_three_requests_and_instructions() {
|
async fn summarize_context_three_requests_and_instructions() {
|
||||||
@@ -860,3 +862,97 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
|||||||
"second auto compact request should include the summarization prompt"
|
"second auto compact request should include the summarization prompt"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
|
||||||
|
skip_if_no_network!();
|
||||||
|
|
||||||
|
let server = start_mock_server().await;
|
||||||
|
|
||||||
|
let context_window = 100;
|
||||||
|
let limit = context_window * 90 / 100;
|
||||||
|
let over_limit_tokens = context_window * 95 / 100 + 1;
|
||||||
|
|
||||||
|
let first_turn = sse(vec![
|
||||||
|
ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
|
||||||
|
ev_completed_with_tokens("r1", 50),
|
||||||
|
]);
|
||||||
|
let function_call_follow_up = sse(vec![
|
||||||
|
ev_assistant_message("m2", FINAL_REPLY),
|
||||||
|
ev_completed_with_tokens("r2", over_limit_tokens),
|
||||||
|
]);
|
||||||
|
let auto_compact_turn = sse(vec![
|
||||||
|
ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
|
||||||
|
ev_completed_with_tokens("r3", 10),
|
||||||
|
]);
|
||||||
|
let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 10)]);
|
||||||
|
|
||||||
|
// Mount responses in order and keep mocks only for the ones we assert on.
|
||||||
|
let first_turn_mock = mount_sse_once(&server, first_turn).await;
|
||||||
|
let follow_up_mock = mount_sse_once(&server, function_call_follow_up).await;
|
||||||
|
let auto_compact_mock = mount_sse_once(&server, auto_compact_turn).await;
|
||||||
|
// We don't assert on the post-compact request, so no need to keep its mock.
|
||||||
|
mount_sse_once(&server, post_auto_compact_turn).await;
|
||||||
|
|
||||||
|
let model_provider = ModelProviderInfo {
|
||||||
|
base_url: Some(format!("{}/v1", server.uri())),
|
||||||
|
..built_in_model_providers()["openai"].clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
let home = TempDir::new().unwrap();
|
||||||
|
let mut config = load_default_config_for_test(&home);
|
||||||
|
config.model_provider = model_provider;
|
||||||
|
config.model_context_window = Some(context_window);
|
||||||
|
config.model_auto_compact_token_limit = Some(limit);
|
||||||
|
|
||||||
|
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
|
||||||
|
.new_conversation(config)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.conversation;
|
||||||
|
|
||||||
|
codex
|
||||||
|
.submit(Op::UserInput {
|
||||||
|
items: vec![InputItem::Text {
|
||||||
|
text: FUNCTION_CALL_LIMIT_MSG.into(),
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await;
|
||||||
|
|
||||||
|
// Assert first request captured expected user message that triggers function call.
|
||||||
|
let first_request = first_turn_mock.single_request().input();
|
||||||
|
assert!(
|
||||||
|
first_request.iter().any(|item| {
|
||||||
|
item.get("type").and_then(|value| value.as_str()) == Some("message")
|
||||||
|
&& item
|
||||||
|
.get("content")
|
||||||
|
.and_then(|content| content.as_array())
|
||||||
|
.and_then(|entries| entries.first())
|
||||||
|
.and_then(|entry| entry.get("text"))
|
||||||
|
.and_then(|value| value.as_str())
|
||||||
|
== Some(FUNCTION_CALL_LIMIT_MSG)
|
||||||
|
}),
|
||||||
|
"first request should include the user message that triggers the function call"
|
||||||
|
);
|
||||||
|
|
||||||
|
let function_call_output = follow_up_mock
|
||||||
|
.single_request()
|
||||||
|
.function_call_output(DUMMY_CALL_ID);
|
||||||
|
let output_text = function_call_output
|
||||||
|
.get("output")
|
||||||
|
.and_then(|value| value.as_str())
|
||||||
|
.unwrap_or_default();
|
||||||
|
assert!(
|
||||||
|
output_text.contains(DUMMY_FUNCTION_NAME),
|
||||||
|
"function call output should be sent before auto compact"
|
||||||
|
);
|
||||||
|
|
||||||
|
let auto_compact_body = auto_compact_mock.single_request().body_json().to_string();
|
||||||
|
assert!(
|
||||||
|
auto_compact_body.contains("You have exceeded the maximum number of tokens"),
|
||||||
|
"auto compact request should include the summarization prompt after exceeding 95% (limit {limit})"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|||||||
@@ -57,11 +57,11 @@ pub struct TurnFailedEvent {
|
|||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS, Default)]
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS, Default)]
|
||||||
pub struct Usage {
|
pub struct Usage {
|
||||||
/// The number of input tokens used during the turn.
|
/// The number of input tokens used during the turn.
|
||||||
pub input_tokens: u64,
|
pub input_tokens: i64,
|
||||||
/// The number of cached input tokens used during the turn.
|
/// The number of cached input tokens used during the turn.
|
||||||
pub cached_input_tokens: u64,
|
pub cached_input_tokens: i64,
|
||||||
/// The number of output tokens used during the turn.
|
/// The number of output tokens used during the turn.
|
||||||
pub output_tokens: u64,
|
pub output_tokens: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
|
||||||
|
|||||||
@@ -86,8 +86,8 @@ impl OtelEventManager {
|
|||||||
provider_name: &str,
|
provider_name: &str,
|
||||||
reasoning_effort: Option<ReasoningEffort>,
|
reasoning_effort: Option<ReasoningEffort>,
|
||||||
reasoning_summary: ReasoningSummary,
|
reasoning_summary: ReasoningSummary,
|
||||||
context_window: Option<u64>,
|
context_window: Option<i64>,
|
||||||
max_output_tokens: Option<u64>,
|
max_output_tokens: Option<i64>,
|
||||||
auto_compact_token_limit: Option<i64>,
|
auto_compact_token_limit: Option<i64>,
|
||||||
approval_policy: AskForApproval,
|
approval_policy: AskForApproval,
|
||||||
sandbox_policy: SandboxPolicy,
|
sandbox_policy: SandboxPolicy,
|
||||||
@@ -281,11 +281,11 @@ impl OtelEventManager {
|
|||||||
|
|
||||||
pub fn sse_event_completed(
|
pub fn sse_event_completed(
|
||||||
&self,
|
&self,
|
||||||
input_token_count: u64,
|
input_token_count: i64,
|
||||||
output_token_count: u64,
|
output_token_count: i64,
|
||||||
cached_token_count: Option<u64>,
|
cached_token_count: Option<i64>,
|
||||||
reasoning_token_count: Option<u64>,
|
reasoning_token_count: Option<i64>,
|
||||||
tool_token_count: u64,
|
tool_token_count: i64,
|
||||||
) {
|
) {
|
||||||
tracing::event!(
|
tracing::event!(
|
||||||
tracing::Level::INFO,
|
tracing::Level::INFO,
|
||||||
|
|||||||
@@ -22,27 +22,28 @@ fn formatter() -> &'static DecimalFormatter {
|
|||||||
FORMATTER.get_or_init(|| make_local_formatter().unwrap_or_else(make_en_us_formatter))
|
FORMATTER.get_or_init(|| make_local_formatter().unwrap_or_else(make_en_us_formatter))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Format a u64 with locale-aware digit separators (e.g. "12345" -> "12,345"
|
/// Format an i64 with locale-aware digit separators (e.g. "12345" -> "12,345"
|
||||||
/// for en-US).
|
/// for en-US).
|
||||||
pub fn format_with_separators(n: u64) -> String {
|
pub fn format_with_separators(n: i64) -> String {
|
||||||
formatter().format(&Decimal::from(n)).to_string()
|
formatter().format(&Decimal::from(n)).to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn format_si_suffix_with_formatter(n: u64, formatter: &DecimalFormatter) -> String {
|
fn format_si_suffix_with_formatter(n: i64, formatter: &DecimalFormatter) -> String {
|
||||||
|
let n = n.max(0);
|
||||||
if n < 1000 {
|
if n < 1000 {
|
||||||
return formatter.format(&Decimal::from(n)).to_string();
|
return formatter.format(&Decimal::from(n)).to_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Format `n / scale` with the requested number of fractional digits.
|
// Format `n / scale` with the requested number of fractional digits.
|
||||||
let format_scaled = |n: u64, scale: u64, frac_digits: u32| -> String {
|
let format_scaled = |n: i64, scale: i64, frac_digits: u32| -> String {
|
||||||
let value = n as f64 / scale as f64;
|
let value = n as f64 / scale as f64;
|
||||||
let scaled: u64 = (value * 10f64.powi(frac_digits as i32)).round() as u64;
|
let scaled: i64 = (value * 10f64.powi(frac_digits as i32)).round() as i64;
|
||||||
let mut dec = Decimal::from(scaled);
|
let mut dec = Decimal::from(scaled);
|
||||||
dec.multiply_pow10(-(frac_digits as i16));
|
dec.multiply_pow10(-(frac_digits as i16));
|
||||||
formatter.format(&dec).to_string()
|
formatter.format(&dec).to_string()
|
||||||
};
|
};
|
||||||
|
|
||||||
const UNITS: [(u64, &str); 3] = [(1_000, "K"), (1_000_000, "M"), (1_000_000_000, "G")];
|
const UNITS: [(i64, &str); 3] = [(1_000, "K"), (1_000_000, "M"), (1_000_000_000, "G")];
|
||||||
let f = n as f64;
|
let f = n as f64;
|
||||||
for &(scale, suffix) in &UNITS {
|
for &(scale, suffix) in &UNITS {
|
||||||
if (100.0 * f / scale as f64).round() < 1000.0 {
|
if (100.0 * f / scale as f64).round() < 1000.0 {
|
||||||
@@ -57,7 +58,7 @@ fn format_si_suffix_with_formatter(n: u64, formatter: &DecimalFormatter) -> Stri
|
|||||||
// Above 1000G, keep whole‑G precision.
|
// Above 1000G, keep whole‑G precision.
|
||||||
format!(
|
format!(
|
||||||
"{}G",
|
"{}G",
|
||||||
format_with_separators(((n as f64) / 1e9).round() as u64)
|
format_with_separators(((n as f64) / 1e9).round() as i64)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,7 +68,7 @@ fn format_si_suffix_with_formatter(n: u64, formatter: &DecimalFormatter) -> Stri
|
|||||||
/// - 999 -> "999"
|
/// - 999 -> "999"
|
||||||
/// - 1200 -> "1.20K"
|
/// - 1200 -> "1.20K"
|
||||||
/// - 123456789 -> "123M"
|
/// - 123456789 -> "123M"
|
||||||
pub fn format_si_suffix(n: u64) -> String {
|
pub fn format_si_suffix(n: i64) -> String {
|
||||||
format_si_suffix_with_formatter(n, formatter())
|
format_si_suffix_with_formatter(n, formatter())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,7 +79,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn kmg() {
|
fn kmg() {
|
||||||
let formatter = make_en_us_formatter();
|
let formatter = make_en_us_formatter();
|
||||||
let fmt = |n: u64| format_si_suffix_with_formatter(n, &formatter);
|
let fmt = |n: i64| format_si_suffix_with_formatter(n, &formatter);
|
||||||
assert_eq!(fmt(0), "0");
|
assert_eq!(fmt(0), "0");
|
||||||
assert_eq!(fmt(999), "999");
|
assert_eq!(fmt(999), "999");
|
||||||
assert_eq!(fmt(1_000), "1.00K");
|
assert_eq!(fmt(1_000), "1.00K");
|
||||||
|
|||||||
@@ -545,21 +545,21 @@ pub struct TaskCompleteEvent {
|
|||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
|
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
|
||||||
pub struct TaskStartedEvent {
|
pub struct TaskStartedEvent {
|
||||||
pub model_context_window: Option<u64>,
|
pub model_context_window: Option<i64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Serialize, Default, TS)]
|
#[derive(Debug, Clone, Deserialize, Serialize, Default, TS)]
|
||||||
pub struct TokenUsage {
|
pub struct TokenUsage {
|
||||||
#[ts(type = "number")]
|
#[ts(type = "number")]
|
||||||
pub input_tokens: u64,
|
pub input_tokens: i64,
|
||||||
#[ts(type = "number")]
|
#[ts(type = "number")]
|
||||||
pub cached_input_tokens: u64,
|
pub cached_input_tokens: i64,
|
||||||
#[ts(type = "number")]
|
#[ts(type = "number")]
|
||||||
pub output_tokens: u64,
|
pub output_tokens: i64,
|
||||||
#[ts(type = "number")]
|
#[ts(type = "number")]
|
||||||
pub reasoning_output_tokens: u64,
|
pub reasoning_output_tokens: i64,
|
||||||
#[ts(type = "number")]
|
#[ts(type = "number")]
|
||||||
pub total_tokens: u64,
|
pub total_tokens: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
|
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
|
||||||
@@ -567,14 +567,14 @@ pub struct TokenUsageInfo {
|
|||||||
pub total_token_usage: TokenUsage,
|
pub total_token_usage: TokenUsage,
|
||||||
pub last_token_usage: TokenUsage,
|
pub last_token_usage: TokenUsage,
|
||||||
#[ts(type = "number | null")]
|
#[ts(type = "number | null")]
|
||||||
pub model_context_window: Option<u64>,
|
pub model_context_window: Option<i64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TokenUsageInfo {
|
impl TokenUsageInfo {
|
||||||
pub fn new_or_append(
|
pub fn new_or_append(
|
||||||
info: &Option<TokenUsageInfo>,
|
info: &Option<TokenUsageInfo>,
|
||||||
last: &Option<TokenUsage>,
|
last: &Option<TokenUsage>,
|
||||||
model_context_window: Option<u64>,
|
model_context_window: Option<i64>,
|
||||||
) -> Option<Self> {
|
) -> Option<Self> {
|
||||||
if info.is_none() && last.is_none() {
|
if info.is_none() && last.is_none() {
|
||||||
return None;
|
return None;
|
||||||
@@ -599,9 +599,9 @@ impl TokenUsageInfo {
|
|||||||
self.last_token_usage = last.clone();
|
self.last_token_usage = last.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn fill_to_context_window(&mut self, context_window: u64) {
|
pub fn fill_to_context_window(&mut self, context_window: i64) {
|
||||||
let previous_total = self.total_token_usage.total_tokens;
|
let previous_total = self.total_token_usage.total_tokens;
|
||||||
let delta = context_window.saturating_sub(previous_total);
|
let delta = (context_window - previous_total).max(0);
|
||||||
|
|
||||||
self.model_context_window = Some(context_window);
|
self.model_context_window = Some(context_window);
|
||||||
self.total_token_usage = TokenUsage {
|
self.total_token_usage = TokenUsage {
|
||||||
@@ -614,7 +614,7 @@ impl TokenUsageInfo {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn full_context_window(context_window: u64) -> Self {
|
pub fn full_context_window(context_window: i64) -> Self {
|
||||||
let mut info = Self {
|
let mut info = Self {
|
||||||
total_token_usage: TokenUsage::default(),
|
total_token_usage: TokenUsage::default(),
|
||||||
last_token_usage: TokenUsage::default(),
|
last_token_usage: TokenUsage::default(),
|
||||||
@@ -643,40 +643,39 @@ pub struct RateLimitWindow {
|
|||||||
pub used_percent: f64,
|
pub used_percent: f64,
|
||||||
/// Rolling window duration, in minutes.
|
/// Rolling window duration, in minutes.
|
||||||
#[ts(type = "number | null")]
|
#[ts(type = "number | null")]
|
||||||
pub window_minutes: Option<u64>,
|
pub window_minutes: Option<i64>,
|
||||||
/// Timestamp (RFC3339) when the window resets.
|
/// Timestamp (RFC3339) when the window resets.
|
||||||
#[ts(type = "string | null")]
|
#[ts(type = "string | null")]
|
||||||
pub resets_at: Option<String>,
|
pub resets_at: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Includes prompts, tools and space to call compact.
|
// Includes prompts, tools and space to call compact.
|
||||||
const BASELINE_TOKENS: u64 = 12000;
|
const BASELINE_TOKENS: i64 = 12000;
|
||||||
|
|
||||||
impl TokenUsage {
|
impl TokenUsage {
|
||||||
pub fn is_zero(&self) -> bool {
|
pub fn is_zero(&self) -> bool {
|
||||||
self.total_tokens == 0
|
self.total_tokens == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn cached_input(&self) -> u64 {
|
pub fn cached_input(&self) -> i64 {
|
||||||
self.cached_input_tokens
|
self.cached_input_tokens.max(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn non_cached_input(&self) -> u64 {
|
pub fn non_cached_input(&self) -> i64 {
|
||||||
self.input_tokens.saturating_sub(self.cached_input())
|
(self.input_tokens - self.cached_input()).max(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Primary count for display as a single absolute value: non-cached input + output.
|
/// Primary count for display as a single absolute value: non-cached input + output.
|
||||||
pub fn blended_total(&self) -> u64 {
|
pub fn blended_total(&self) -> i64 {
|
||||||
self.non_cached_input() + self.output_tokens
|
(self.non_cached_input() + self.output_tokens.max(0)).max(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// For estimating what % of the model's context window is used, we need to account
|
/// For estimating what % of the model's context window is used, we need to account
|
||||||
/// for reasoning output tokens from prior turns being dropped from the context window.
|
/// for reasoning output tokens from prior turns being dropped from the context window.
|
||||||
/// We approximate this here by subtracting reasoning output tokens from the total.
|
/// We approximate this here by subtracting reasoning output tokens from the total.
|
||||||
/// This will be off for the current turn and pending function calls.
|
/// This will be off for the current turn and pending function calls.
|
||||||
pub fn tokens_in_context_window(&self) -> u64 {
|
pub fn tokens_in_context_window(&self) -> i64 {
|
||||||
self.total_tokens
|
(self.total_tokens - self.reasoning_output_tokens).max(0)
|
||||||
.saturating_sub(self.reasoning_output_tokens)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the remaining user-controllable percentage of the model's context window.
|
/// Estimate the remaining user-controllable percentage of the model's context window.
|
||||||
@@ -689,17 +688,17 @@ impl TokenUsage {
|
|||||||
/// This normalizes both the numerator and denominator by subtracting the
|
/// This normalizes both the numerator and denominator by subtracting the
|
||||||
/// baseline, so immediately after the first prompt the UI shows 100% left
|
/// baseline, so immediately after the first prompt the UI shows 100% left
|
||||||
/// and trends toward 0% as the user fills the effective window.
|
/// and trends toward 0% as the user fills the effective window.
|
||||||
pub fn percent_of_context_window_remaining(&self, context_window: u64) -> u8 {
|
pub fn percent_of_context_window_remaining(&self, context_window: i64) -> i64 {
|
||||||
if context_window <= BASELINE_TOKENS {
|
if context_window <= BASELINE_TOKENS {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
let effective_window = context_window - BASELINE_TOKENS;
|
let effective_window = context_window - BASELINE_TOKENS;
|
||||||
let used = self
|
let used = (self.tokens_in_context_window() - BASELINE_TOKENS).max(0);
|
||||||
.tokens_in_context_window()
|
let remaining = (effective_window - used).max(0);
|
||||||
.saturating_sub(BASELINE_TOKENS);
|
((remaining as f64 / effective_window as f64) * 100.0)
|
||||||
let remaining = effective_window.saturating_sub(used);
|
.clamp(0.0, 100.0)
|
||||||
((remaining as f32 / effective_window as f32) * 100.0).clamp(0.0, 100.0) as u8
|
.round() as i64
|
||||||
}
|
}
|
||||||
|
|
||||||
/// In-place element-wise sum of token counts.
|
/// In-place element-wise sum of token counts.
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ pub(crate) struct ChatComposer {
|
|||||||
custom_prompts: Vec<CustomPrompt>,
|
custom_prompts: Vec<CustomPrompt>,
|
||||||
footer_mode: FooterMode,
|
footer_mode: FooterMode,
|
||||||
footer_hint_override: Option<Vec<(String, String)>>,
|
footer_hint_override: Option<Vec<(String, String)>>,
|
||||||
context_window_percent: Option<u8>,
|
context_window_percent: Option<i64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Popup state – at most one can be visible at any time.
|
/// Popup state – at most one can be visible at any time.
|
||||||
@@ -1511,7 +1511,7 @@ impl ChatComposer {
|
|||||||
self.is_task_running = running;
|
self.is_task_running = running;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn set_context_window_percent(&mut self, percent: Option<u8>) {
|
pub(crate) fn set_context_window_percent(&mut self, percent: Option<i64>) {
|
||||||
if self.context_window_percent != percent {
|
if self.context_window_percent != percent {
|
||||||
self.context_window_percent = percent;
|
self.context_window_percent = percent;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ pub(crate) struct FooterProps {
|
|||||||
pub(crate) esc_backtrack_hint: bool,
|
pub(crate) esc_backtrack_hint: bool,
|
||||||
pub(crate) use_shift_enter_hint: bool,
|
pub(crate) use_shift_enter_hint: bool,
|
||||||
pub(crate) is_task_running: bool,
|
pub(crate) is_task_running: bool,
|
||||||
pub(crate) context_window_percent: Option<u8>,
|
pub(crate) context_window_percent: Option<i64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||||
@@ -221,8 +221,8 @@ fn build_columns(entries: Vec<Line<'static>>) -> Vec<Line<'static>> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn context_window_line(percent: Option<u8>) -> Line<'static> {
|
fn context_window_line(percent: Option<i64>) -> Line<'static> {
|
||||||
let percent = percent.unwrap_or(100);
|
let percent = percent.unwrap_or(100).clamp(0, 100);
|
||||||
Line::from(vec![Span::from(format!("{percent}% context left")).dim()])
|
Line::from(vec![Span::from(format!("{percent}% context left")).dim()])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ pub(crate) struct BottomPane {
|
|||||||
status: Option<StatusIndicatorWidget>,
|
status: Option<StatusIndicatorWidget>,
|
||||||
/// Queued user messages to show under the status indicator.
|
/// Queued user messages to show under the status indicator.
|
||||||
queued_user_messages: Vec<String>,
|
queued_user_messages: Vec<String>,
|
||||||
context_window_percent: Option<u8>,
|
context_window_percent: Option<i64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct BottomPaneParams {
|
pub(crate) struct BottomPaneParams {
|
||||||
@@ -357,7 +357,7 @@ impl BottomPane {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn set_context_window_percent(&mut self, percent: Option<u8>) {
|
pub(crate) fn set_context_window_percent(&mut self, percent: Option<i64>) {
|
||||||
if self.context_window_percent == percent {
|
if self.context_window_percent == percent {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -142,9 +142,9 @@ impl RateLimitWarningState {
|
|||||||
fn take_warnings(
|
fn take_warnings(
|
||||||
&mut self,
|
&mut self,
|
||||||
secondary_used_percent: Option<f64>,
|
secondary_used_percent: Option<f64>,
|
||||||
secondary_window_minutes: Option<u64>,
|
secondary_window_minutes: Option<i64>,
|
||||||
primary_used_percent: Option<f64>,
|
primary_used_percent: Option<f64>,
|
||||||
primary_window_minutes: Option<u64>,
|
primary_window_minutes: Option<i64>,
|
||||||
) -> Vec<String> {
|
) -> Vec<String> {
|
||||||
let reached_secondary_cap =
|
let reached_secondary_cap =
|
||||||
matches!(secondary_used_percent, Some(percent) if percent == 100.0);
|
matches!(secondary_used_percent, Some(percent) if percent == 100.0);
|
||||||
@@ -195,12 +195,14 @@ impl RateLimitWarningState {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_limits_duration(windows_minutes: u64) -> String {
|
pub(crate) fn get_limits_duration(windows_minutes: i64) -> String {
|
||||||
const MINUTES_PER_HOUR: u64 = 60;
|
const MINUTES_PER_HOUR: i64 = 60;
|
||||||
const MINUTES_PER_DAY: u64 = 24 * MINUTES_PER_HOUR;
|
const MINUTES_PER_DAY: i64 = 24 * MINUTES_PER_HOUR;
|
||||||
const MINUTES_PER_WEEK: u64 = 7 * MINUTES_PER_DAY;
|
const MINUTES_PER_WEEK: i64 = 7 * MINUTES_PER_DAY;
|
||||||
const MINUTES_PER_MONTH: u64 = 30 * MINUTES_PER_DAY;
|
const MINUTES_PER_MONTH: i64 = 30 * MINUTES_PER_DAY;
|
||||||
const ROUNDING_BIAS_MINUTES: u64 = 3;
|
const ROUNDING_BIAS_MINUTES: i64 = 3;
|
||||||
|
|
||||||
|
let windows_minutes = windows_minutes.max(0);
|
||||||
|
|
||||||
if windows_minutes <= MINUTES_PER_DAY.saturating_add(ROUNDING_BIAS_MINUTES) {
|
if windows_minutes <= MINUTES_PER_DAY.saturating_add(ROUNDING_BIAS_MINUTES) {
|
||||||
let adjusted = windows_minutes.saturating_add(ROUNDING_BIAS_MINUTES);
|
let adjusted = windows_minutes.saturating_add(ROUNDING_BIAS_MINUTES);
|
||||||
|
|||||||
@@ -31,16 +31,16 @@ use super::rate_limits::render_status_limit_progress_bar;
|
|||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct StatusContextWindowData {
|
struct StatusContextWindowData {
|
||||||
percent_remaining: u8,
|
percent_remaining: i64,
|
||||||
tokens_in_context: u64,
|
tokens_in_context: i64,
|
||||||
window: u64,
|
window: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub(crate) struct StatusTokenUsageData {
|
pub(crate) struct StatusTokenUsageData {
|
||||||
total: u64,
|
total: i64,
|
||||||
input: u64,
|
input: i64,
|
||||||
output: u64,
|
output: i64,
|
||||||
context_window: Option<StatusContextWindowData>,
|
context_window: Option<StatusContextWindowData>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -103,7 +103,8 @@ pub(crate) fn compose_account_display(config: &Config) -> Option<StatusAccountDi
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn format_tokens_compact(value: u64) -> String {
|
pub(crate) fn format_tokens_compact(value: i64) -> String {
|
||||||
|
let value = value.max(0);
|
||||||
if value == 0 {
|
if value == 0 {
|
||||||
return "0".to_string();
|
return "0".to_string();
|
||||||
}
|
}
|
||||||
@@ -111,14 +112,15 @@ pub(crate) fn format_tokens_compact(value: u64) -> String {
|
|||||||
return value.to_string();
|
return value.to_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let value_f64 = value as f64;
|
||||||
let (scaled, suffix) = if value >= 1_000_000_000_000 {
|
let (scaled, suffix) = if value >= 1_000_000_000_000 {
|
||||||
(value as f64 / 1_000_000_000_000.0, "T")
|
(value_f64 / 1_000_000_000_000.0, "T")
|
||||||
} else if value >= 1_000_000_000 {
|
} else if value >= 1_000_000_000 {
|
||||||
(value as f64 / 1_000_000_000.0, "B")
|
(value_f64 / 1_000_000_000.0, "B")
|
||||||
} else if value >= 1_000_000 {
|
} else if value >= 1_000_000 {
|
||||||
(value as f64 / 1_000_000.0, "M")
|
(value_f64 / 1_000_000.0, "M")
|
||||||
} else {
|
} else {
|
||||||
(value as f64 / 1_000.0, "K")
|
(value_f64 / 1_000.0, "K")
|
||||||
};
|
};
|
||||||
|
|
||||||
let decimals = if scaled < 10.0 {
|
let decimals = if scaled < 10.0 {
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ pub(crate) enum StatusRateLimitData {
|
|||||||
pub(crate) struct RateLimitWindowDisplay {
|
pub(crate) struct RateLimitWindowDisplay {
|
||||||
pub used_percent: f64,
|
pub used_percent: f64,
|
||||||
pub resets_at: Option<String>,
|
pub resets_at: Option<String>,
|
||||||
pub window_minutes: Option<u64>,
|
pub window_minutes: Option<i64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RateLimitWindowDisplay {
|
impl RateLimitWindowDisplay {
|
||||||
|
|||||||
Reference in New Issue
Block a user