Files
llmx/codex-rs/protocol/src/num_format.rs
Justin Lebar 18330c2362 Format large numbers in a more readable way. (#2046)
- In the bottom line of the TUI, print the number of tokens to 3 sigfigs
  with an SI suffix, e.g. "1.23K".
- Elsewhere where we print a number, I figure it's worthwhile to print
  the exact number, because e.g. it's a summary of your session. Here we print
  the numbers comma-separated.
2025-09-08 21:48:48 +00:00

99 lines
3.5 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use std::sync::OnceLock;
use icu_decimal::DecimalFormatter;
use icu_decimal::input::Decimal;
use icu_decimal::options::DecimalFormatterOptions;
use icu_locale_core::Locale;
fn make_local_formatter() -> Option<DecimalFormatter> {
let loc: Locale = sys_locale::get_locale()?.parse().ok()?;
DecimalFormatter::try_new(loc.into(), DecimalFormatterOptions::default()).ok()
}
fn make_en_us_formatter() -> DecimalFormatter {
#![allow(clippy::expect_used)]
let loc: Locale = "en-US".parse().expect("en-US wasn't a valid locale");
DecimalFormatter::try_new(loc.into(), DecimalFormatterOptions::default())
.expect("en-US wasn't a valid locale")
}
fn formatter() -> &'static DecimalFormatter {
static FORMATTER: OnceLock<DecimalFormatter> = OnceLock::new();
FORMATTER.get_or_init(|| make_local_formatter().unwrap_or_else(make_en_us_formatter))
}
/// Format a u64 with locale-aware digit separators (e.g. "12345" -> "12,345"
/// for en-US).
pub fn format_with_separators(n: u64) -> String {
formatter().format(&Decimal::from(n)).to_string()
}
fn format_si_suffix_with_formatter(n: u64, formatter: &DecimalFormatter) -> String {
if n < 1000 {
return formatter.format(&Decimal::from(n)).to_string();
}
// Format `n / scale` with the requested number of fractional digits.
let format_scaled = |n: u64, scale: u64, frac_digits: u32| -> String {
let value = n as f64 / scale as f64;
let scaled: u64 = (value * 10f64.powi(frac_digits as i32)).round() as u64;
let mut dec = Decimal::from(scaled);
dec.multiply_pow10(-(frac_digits as i16));
formatter.format(&dec).to_string()
};
const UNITS: [(u64, &str); 3] = [(1_000, "K"), (1_000_000, "M"), (1_000_000_000, "G")];
let f = n as f64;
for &(scale, suffix) in &UNITS {
if (100.0 * f / scale as f64).round() < 1000.0 {
return format!("{}{}", format_scaled(n, scale, 2), suffix);
} else if (10.0 * f / scale as f64).round() < 1000.0 {
return format!("{}{}", format_scaled(n, scale, 1), suffix);
} else if (f / scale as f64).round() < 1000.0 {
return format!("{}{}", format_scaled(n, scale, 0), suffix);
}
}
// Above 1000G, keep wholeG precision.
format!(
"{}G",
format_with_separators(((n as f64) / 1e9).round() as u64)
)
}
/// Format token counts to 3 significant figures, using base-10 SI suffixes.
///
/// Examples (en-US):
/// - 999 -> "999"
/// - 1200 -> "1.20K"
/// - 123456789 -> "123M"
pub fn format_si_suffix(n: u64) -> String {
format_si_suffix_with_formatter(n, formatter())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn kmg() {
let formatter = make_en_us_formatter();
let fmt = |n: u64| format_si_suffix_with_formatter(n, &formatter);
assert_eq!(fmt(0), "0");
assert_eq!(fmt(999), "999");
assert_eq!(fmt(1_000), "1.00K");
assert_eq!(fmt(1_200), "1.20K");
assert_eq!(fmt(10_000), "10.0K");
assert_eq!(fmt(100_000), "100K");
assert_eq!(fmt(999_500), "1.00M");
assert_eq!(fmt(1_000_000), "1.00M");
assert_eq!(fmt(1_234_000), "1.23M");
assert_eq!(fmt(12_345_678), "12.3M");
assert_eq!(fmt(999_950_000), "1.00G");
assert_eq!(fmt(1_000_000_000), "1.00G");
assert_eq!(fmt(1_234_000_000), "1.23G");
// Above 1000G we keep wholeG precision (no higher unit supported here).
assert_eq!(fmt(1_234_000_000_000), "1,234G");
}
}