feat: use actual tokenizer for unified_exec truncation (#5514)

This commit is contained in:
jif-oai
2025-10-23 17:08:06 +01:00
committed by GitHub
parent 6745b12427
commit 0b4527146e
4 changed files with 68 additions and 41 deletions

View File

@@ -55,8 +55,13 @@ impl Tokenizer {
Ok(Self { inner })
}
/// Default to `O200kBase`
pub fn try_default() -> Result<Self, TokenizerError> {
Self::new(EncodingKind::O200kBase)
}
/// Build a tokenizer using an `OpenAI` model name (maps to an encoding).
/// Falls back to the `o200k_base` encoding when the model is unknown.
/// Falls back to the `O200kBase` encoding when the model is unknown.
pub fn for_model(model: &str) -> Result<Self, TokenizerError> {
match tiktoken_rs::get_bpe_from_model(model) {
Ok(inner) => Ok(Self { inner }),