From 6d68a90064f53486b4488804933f488e1d914b6f Mon Sep 17 00:00:00 2001 From: Luci <22126563+LuciNyan@users.noreply.github.com> Date: Fri, 25 Apr 2025 01:49:18 +0800 Subject: [PATCH] feat: enhance toCodePoints to prevent potential unicode 14 errors (#615) ## Description `Array.from` may fail when handling certain characters newly added in Unicode 14. Where possible, it seems better to use `Intl.Segmenter` for more reliable processing. ![image](https://github.com/user-attachments/assets/2cbd779d-69d3-448e-b76a-d793cb639d96) --- codex-cli/src/text-buffer.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/codex-cli/src/text-buffer.ts b/codex-cli/src/text-buffer.ts index fe4e2a47..150feec9 100644 --- a/codex-cli/src/text-buffer.ts +++ b/codex-cli/src/text-buffer.ts @@ -34,6 +34,10 @@ function clamp(v: number, min: number, max: number): number { * ---------------------------------------------------------------------- */ function toCodePoints(str: string): Array { + if (typeof Intl !== "undefined" && "Segmenter" in Intl) { + const seg = new Intl.Segmenter(); + return [...seg.segment(str)].map((seg) => seg.segment); + } // [...str] or Array.from both iterate by UTF‑32 code point, handling // surrogate pairs correctly. return Array.from(str);