feat: local tokenizer (#5508)

This commit is contained in:
jif-oai
2025-10-22 16:01:02 +01:00
committed by GitHub
parent 00b1e130b3
commit fd0673e457
4 changed files with 218 additions and 3 deletions

46
codex-rs/Cargo.lock generated
View File

@@ -1517,6 +1517,16 @@ dependencies = [
name = "codex-utils-string"
version = "0.0.0"
[[package]]
name = "codex-utils-tokenizer"
version = "0.0.0"
dependencies = [
"anyhow",
"pretty_assertions",
"thiserror 2.0.16",
"tiktoken-rs",
]
[[package]]
name = "color-eyre"
version = "0.6.5"
@@ -2314,6 +2324,17 @@ dependencies = [
"once_cell",
]
[[package]]
name = "fancy-regex"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [
"bit-set",
"regex-automata",
"regex-syntax 0.8.5",
]
[[package]]
name = "fastrand"
version = "2.3.0"
@@ -4631,7 +4652,7 @@ dependencies = [
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash",
"rustc-hash 2.1.1",
"rustls",
"socket2 0.6.0",
"thiserror 2.0.16",
@@ -4651,7 +4672,7 @@ dependencies = [
"lru-slab",
"rand 0.9.2",
"ring",
"rustc-hash",
"rustc-hash 2.1.1",
"rustls",
"rustls-pki-types",
"slab",
@@ -4996,6 +5017,12 @@ version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc-hash"
version = "2.1.1"
@@ -6176,6 +6203,21 @@ dependencies = [
"zune-jpeg",
]
[[package]]
name = "tiktoken-rs"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25563eeba904d770acf527e8b370fe9a5547bacd20ff84a0b6c3bc41288e5625"
dependencies = [
"anyhow",
"base64",
"bstr",
"fancy-regex",
"lazy_static",
"regex",
"rustc-hash 1.1.0",
]
[[package]]
name = "time"
version = "0.3.44"