Format large numbers in a more readable way. (#2046)

- In the bottom line of the TUI, print the number of tokens to 3 sigfigs
  with an SI suffix, e.g. "1.23K".
- Elsewhere where we print a number, I figure it's worthwhile to print
  the exact number, because e.g. it's a summary of your session. Here we print
  the numbers comma-separated.
This commit is contained in:
Justin Lebar
2025-09-08 14:48:48 -07:00
committed by GitHub
parent 4c46490e53
commit 18330c2362
8 changed files with 196 additions and 11 deletions

69
codex-rs/Cargo.lock generated
View File

@@ -917,6 +917,8 @@ name = "codex-protocol"
version = "0.0.0"
dependencies = [
"base64 0.22.1",
"icu_decimal",
"icu_locale_core",
"mcp-types",
"mime_guess",
"pretty_assertions",
@@ -926,6 +928,7 @@ dependencies = [
"serde_with",
"strum 0.27.2",
"strum_macros 0.27.2",
"sys-locale",
"tracing",
"ts-rs",
"uuid",
@@ -1758,6 +1761,17 @@ dependencies = [
"winapi",
]
[[package]]
name = "fixed_decimal"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35943d22b2f19c0cb198ecf915910a8158e94541c89dcc63300d7799d46c2c5e"
dependencies = [
"displaydoc",
"smallvec",
"writeable",
]
[[package]]
name = "fixedbitset"
version = "0.4.2"
@@ -2257,6 +2271,45 @@ dependencies = [
"zerovec",
]
[[package]]
name = "icu_decimal"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fec61c43fdc4e368a9f450272833123a8ef0d7083a44597660ce94d791b8a2e2"
dependencies = [
"displaydoc",
"fixed_decimal",
"icu_decimal_data",
"icu_locale",
"icu_locale_core",
"icu_provider",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_decimal_data"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b70963bc35f9bdf1bc66a5c1f458f4991c1dc71760e00fa06016b2c76b2738d5"
[[package]]
name = "icu_locale"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ae5921528335e91da1b6c695dbf1ec37df5ac13faa3f91e5640be93aa2fbefd"
dependencies = [
"displaydoc",
"icu_collections",
"icu_locale_core",
"icu_locale_data",
"icu_provider",
"potential_utf",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_locale_core"
version = "2.0.0"
@@ -2270,6 +2323,12 @@ dependencies = [
"zerovec",
]
[[package]]
name = "icu_locale_data"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fdef0c124749d06a743c69e938350816554eb63ac979166590e2b4ee4252765"
[[package]]
name = "icu_normalizer"
version = "2.0.0"
@@ -3514,6 +3573,7 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585"
dependencies = [
"serde",
"zerovec",
]
@@ -4850,6 +4910,15 @@ dependencies = [
"yaml-rust",
]
[[package]]
name = "sys-locale"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eab9a99a024a169fe8a903cf9d4a3b3601109bcc13bd9e3c6fff259138626c4"
dependencies = [
"libc",
]
[[package]]
name = "system-configuration"
version = "0.6.1"

View File

@@ -26,6 +26,7 @@ use codex_core::protocol::TurnAbortReason;
use codex_core::protocol::TurnDiffEvent;
use codex_core::protocol::WebSearchBeginEvent;
use codex_core::protocol::WebSearchEndEvent;
use codex_protocol::num_format::format_with_separators;
use owo_colors::OwoColorize;
use owo_colors::Style;
use shlex::try_join;
@@ -194,7 +195,7 @@ impl EventProcessor for EventProcessorWithHumanOutput {
ts_println!(
self,
"tokens used: {}",
usage_info.total_token_usage.blended_total()
format_with_separators(usage_info.total_token_usage.blended_total())
);
}
}

View File

@@ -12,6 +12,8 @@ workspace = true
[dependencies]
base64 = "0.22.1"
icu_decimal = "2.0.0"
icu_locale_core = "2.0.0"
mcp-types = { path = "../mcp-types" }
mime_guess = "2.0.5"
serde = { version = "1", features = ["derive"] }
@@ -20,6 +22,7 @@ serde_json = "1"
serde_with = { version = "3.14.0", features = ["macros", "base64"] }
strum = "0.27.2"
strum_macros = "0.27.2"
sys-locale = "0.3.2"
tracing = "0.1.41"
ts-rs = { version = "11", features = ["uuid-impl", "serde-json-impl"] }
uuid = { version = "1", features = ["serde", "v4"] }

View File

@@ -3,6 +3,7 @@ pub mod custom_prompts;
pub mod mcp_protocol;
pub mod message_history;
pub mod models;
pub mod num_format;
pub mod parse_command;
pub mod plan_tool;
pub mod protocol;

View File

@@ -0,0 +1,98 @@
use std::sync::OnceLock;
use icu_decimal::DecimalFormatter;
use icu_decimal::input::Decimal;
use icu_decimal::options::DecimalFormatterOptions;
use icu_locale_core::Locale;
fn make_local_formatter() -> Option<DecimalFormatter> {
let loc: Locale = sys_locale::get_locale()?.parse().ok()?;
DecimalFormatter::try_new(loc.into(), DecimalFormatterOptions::default()).ok()
}
fn make_en_us_formatter() -> DecimalFormatter {
#![allow(clippy::expect_used)]
let loc: Locale = "en-US".parse().expect("en-US wasn't a valid locale");
DecimalFormatter::try_new(loc.into(), DecimalFormatterOptions::default())
.expect("en-US wasn't a valid locale")
}
fn formatter() -> &'static DecimalFormatter {
static FORMATTER: OnceLock<DecimalFormatter> = OnceLock::new();
FORMATTER.get_or_init(|| make_local_formatter().unwrap_or_else(make_en_us_formatter))
}
/// Format a u64 with locale-aware digit separators (e.g. "12345" -> "12,345"
/// for en-US).
pub fn format_with_separators(n: u64) -> String {
formatter().format(&Decimal::from(n)).to_string()
}
fn format_si_suffix_with_formatter(n: u64, formatter: &DecimalFormatter) -> String {
if n < 1000 {
return formatter.format(&Decimal::from(n)).to_string();
}
// Format `n / scale` with the requested number of fractional digits.
let format_scaled = |n: u64, scale: u64, frac_digits: u32| -> String {
let value = n as f64 / scale as f64;
let scaled: u64 = (value * 10f64.powi(frac_digits as i32)).round() as u64;
let mut dec = Decimal::from(scaled);
dec.multiply_pow10(-(frac_digits as i16));
formatter.format(&dec).to_string()
};
const UNITS: [(u64, &str); 3] = [(1_000, "K"), (1_000_000, "M"), (1_000_000_000, "G")];
let f = n as f64;
for &(scale, suffix) in &UNITS {
if (100.0 * f / scale as f64).round() < 1000.0 {
return format!("{}{}", format_scaled(n, scale, 2), suffix);
} else if (10.0 * f / scale as f64).round() < 1000.0 {
return format!("{}{}", format_scaled(n, scale, 1), suffix);
} else if (f / scale as f64).round() < 1000.0 {
return format!("{}{}", format_scaled(n, scale, 0), suffix);
}
}
// Above 1000G, keep wholeG precision.
format!(
"{}G",
format_with_separators(((n as f64) / 1e9).round() as u64)
)
}
/// Format token counts to 3 significant figures, using base-10 SI suffixes.
///
/// Examples (en-US):
/// - 999 -> "999"
/// - 1200 -> "1.20K"
/// - 123456789 -> "123M"
pub fn format_si_suffix(n: u64) -> String {
format_si_suffix_with_formatter(n, formatter())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn kmg() {
let formatter = make_en_us_formatter();
let fmt = |n: u64| format_si_suffix_with_formatter(n, &formatter);
assert_eq!(fmt(0), "0");
assert_eq!(fmt(999), "999");
assert_eq!(fmt(1_000), "1.00K");
assert_eq!(fmt(1_200), "1.20K");
assert_eq!(fmt(10_000), "10.0K");
assert_eq!(fmt(100_000), "100K");
assert_eq!(fmt(999_500), "1.00M");
assert_eq!(fmt(1_000_000), "1.00M");
assert_eq!(fmt(1_234_000), "1.23M");
assert_eq!(fmt(12_345_678), "12.3M");
assert_eq!(fmt(999_950_000), "1.00G");
assert_eq!(fmt(1_000_000_000), "1.00G");
assert_eq!(fmt(1_234_000_000), "1.23G");
// Above 1000G we keep wholeG precision (no higher unit supported here).
assert_eq!(fmt(1_234_000_000_000), "1,234G");
}
}

View File

@@ -16,6 +16,7 @@ use crate::custom_prompts::CustomPrompt;
use crate::mcp_protocol::ConversationId;
use crate::message_history::HistoryEntry;
use crate::models::ResponseItem;
use crate::num_format::format_with_separators;
use crate::parse_command::ParsedCommand;
use crate::plan_tool::UpdatePlanArgs;
use mcp_types::CallToolResult;
@@ -645,19 +646,26 @@ impl From<TokenUsage> for FinalOutput {
impl fmt::Display for FinalOutput {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let token_usage = &self.token_usage;
write!(
f,
"Token usage: total={} input={}{} output={}{}",
token_usage.blended_total(),
token_usage.non_cached_input(),
format_with_separators(token_usage.blended_total()),
format_with_separators(token_usage.non_cached_input()),
if token_usage.cached_input() > 0 {
format!(" (+ {} cached)", token_usage.cached_input())
format!(
" (+ {} cached)",
format_with_separators(token_usage.cached_input())
)
} else {
String::new()
},
token_usage.output_tokens,
format_with_separators(token_usage.output_tokens),
if token_usage.reasoning_output_tokens > 0 {
format!(" (reasoning {})", token_usage.reasoning_output_tokens)
format!(
" (reasoning {})",
format_with_separators(token_usage.reasoning_output_tokens)
)
} else {
String::new()
}

View File

@@ -1,4 +1,5 @@
use codex_core::protocol::TokenUsageInfo;
use codex_protocol::num_format::format_si_suffix;
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyEventKind;
@@ -1276,8 +1277,11 @@ impl WidgetRef for ChatComposer {
let token_usage = &token_usage_info.total_token_usage;
hint.push(" ".into());
hint.push(
Span::from(format!("{} tokens used", token_usage.blended_total()))
.style(Style::default().add_modifier(Modifier::DIM)),
Span::from(format!(
"{} tokens used",
format_si_suffix(token_usage.blended_total())
))
.style(Style::default().add_modifier(Modifier::DIM)),
);
let last_token_usage = &token_usage_info.last_token_usage;
if let Some(context_window) = token_usage_info.model_context_window {

View File

@@ -28,6 +28,7 @@ use codex_core::protocol::SandboxPolicy;
use codex_core::protocol::SessionConfiguredEvent;
use codex_core::protocol::TokenUsage;
use codex_protocol::mcp_protocol::ConversationId;
use codex_protocol::num_format::format_with_separators;
use codex_protocol::parse_command::ParsedCommand;
use image::DynamicImage;
use image::ImageReader;
@@ -964,7 +965,7 @@ pub(crate) fn new_status_output(
// Input: <input> [+ <cached> cached]
let mut input_line_spans: Vec<Span<'static>> = vec![
" • Input: ".into(),
usage.non_cached_input().to_string().into(),
format_with_separators(usage.non_cached_input()).into(),
];
if usage.cached_input_tokens > 0 {
let cached = usage.cached_input_tokens;
@@ -974,12 +975,12 @@ pub(crate) fn new_status_output(
// Output: <output>
lines.push(Line::from(vec![
" • Output: ".into(),
usage.output_tokens.to_string().into(),
format_with_separators(usage.output_tokens).into(),
]));
// Total: <total>
lines.push(Line::from(vec![
" • Total: ".into(),
usage.blended_total().to_string().into(),
format_with_separators(usage.blended_total()).into(),
]));
PlainHistoryCell { lines }