From 6d68a90064f53486b4488804933f488e1d914b6f Mon Sep 17 00:00:00 2001
From: Luci <22126563+LuciNyan@users.noreply.github.com>
Date: Fri, 25 Apr 2025 01:49:18 +0800
Subject: [PATCH] feat: enhance toCodePoints to prevent potential unicode 14
 errors (#615)

## Description

`Array.from` may fail when handling certain characters newly added in
Unicode 14. Where possible, it seems better to use `Intl.Segmenter` for
more reliable processing.


![image](https://github.com/user-attachments/assets/2cbd779d-69d3-448e-b76a-d793cb639d96)
---
 codex-cli/src/text-buffer.ts | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/codex-cli/src/text-buffer.ts b/codex-cli/src/text-buffer.ts
index fe4e2a47..150feec9 100644
--- a/codex-cli/src/text-buffer.ts
+++ b/codex-cli/src/text-buffer.ts
@@ -34,6 +34,10 @@ function clamp(v: number, min: number, max: number): number {
  * ---------------------------------------------------------------------- */
 
 function toCodePoints(str: string): Array<string> {
+  if (typeof Intl !== "undefined" && "Segmenter" in Intl) {
+    const seg = new Intl.Segmenter();
+    return [...seg.segment(str)].map((seg) => seg.segment);
+  }
   // [...str] or Array.from both iterate by UTF‑32 code point, handling
   // surrogate pairs correctly.
   return Array.from(str);