Re-add markdown streaming (#2029)
Wait for newlines, then render markdown on a line by line basis. Word wrap it for the current terminal size and then spit it out line by line into the UI. Also adds tests and fixes some UI regressions.
This commit is contained in:
24109
codex-rs/tui/tests/fixtures/binary-size-log.jsonl
vendored
Normal file
24109
codex-rs/tui/tests/fixtures/binary-size-log.jsonl
vendored
Normal file
File diff suppressed because one or more lines are too long
298
codex-rs/tui/tests/fixtures/ideal-binary-response.txt
vendored
Normal file
298
codex-rs/tui/tests/fixtures/ideal-binary-response.txt
vendored
Normal file
@@ -0,0 +1,298 @@
|
||||
thinking
|
||||
Investigating binary sizes
|
||||
|
||||
I need to check the codex-rs repository to explain why the project's binaries
|
||||
are large. The user is likely seeking specifics about the setup: are Rust builds
|
||||
static, what features are enabled, and is debug information included? It could
|
||||
be due to static linking, included OpenSSL, or how panic handling is set up. I
|
||||
should look into the Cargo.toml file to confirm features and profiles without
|
||||
needing to edit any code. Let's get started on this!
|
||||
|
||||
codex
|
||||
I’m going to scan the workspace and Cargo manifests to see build profiles and
|
||||
dependencies that impact binary size. Then I’ll summarize the main causes.
|
||||
|
||||
⚡ Running ls -la
|
||||
⎿ total 6696
|
||||
drwxr-xr-x@ 39 easong staff 1248 Aug 9 08:49 .
|
||||
drwxr-xr-x@ 29 easong staff 928 Aug 9 08:50 ..
|
||||
-rw-r--r--@ 1 easong staff 10244 Aug 8 21:13 .DS_Store
|
||||
-rw-r--r--@ 1 easong staff 212 Aug 4 18:49 .gitignore
|
||||
… +30 lines
|
||||
-rw-r--r--@ 1 easong staff 9971 Aug 8 23:07 simpler.md
|
||||
drwxr-xr-x@ 6 easong staff 192 Aug 4 22:40 target
|
||||
drwxr-xr-x@ 6 easong staff 192 Aug 7 19:20 tui
|
||||
-rw-r--r--@ 1 easong staff 8639 Aug 6 23:30 typewriter.md
|
||||
-rw-r--r--@ 1 easong staff 7673 Aug 6 21:58 wrap-plan.md
|
||||
⚡ Running sed -n '1,200p' Cargo.toml
|
||||
⎿ [workspace]
|
||||
members = [
|
||||
"ansi-escape",
|
||||
"apply-patch",
|
||||
"arg0",
|
||||
… +38 lines
|
||||
codegen-units = 1
|
||||
|
||||
[patch.crates-io]
|
||||
# ratatui = { path = "../../ratatui" }
|
||||
ratatui = { git = "https://github.com/nornagon/ratatui", branch =
|
||||
"nornagon-v0.29.0-patch" }
|
||||
Optimizing Rust Release Profile
|
||||
|
||||
I'm reviewing the workspace's release profile, which has settings like lto=fat,
|
||||
strip=symbols, and codegen-units=1 to reduce binary size. However, it seems that
|
||||
"strip = symbols" only removes symbols, not debug information. In Rust 1.73+,
|
||||
both "symbols" and "debuginfo" stripping options are available. I should check
|
||||
whether using these results in a smaller binary. I’ll also skim through crates
|
||||
for heavy dependencies in Cargo.toml, including cli, core, exec, linux-sandbox,
|
||||
tui, login, ollama, and mcp.
|
||||
|
||||
⚡ Running for d in ansi-escape apply-patch arg0 cli common core exec execpolicy
|
||||
file-search linux-sandbox login mcp-client mcp-server mcp-types ollama tui; do
|
||||
echo "--- $d/Cargo.toml"; sed -n '1,200p' $d/Cargo.toml; echo; done
|
||||
⎿ --- ansi-escape/Cargo.toml
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "codex-ansi-escape"
|
||||
version = { workspace = true }
|
||||
… +237 lines
|
||||
"process",
|
||||
"rt-multi-thread",
|
||||
"signal",
|
||||
] }
|
||||
tracing = { version
|
||||
⚡ Running sed -n '1,200p' execpolicy/Cargo.toml
|
||||
⎿ [package]
|
||||
name = "codex-execpolicy"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
|
||||
… +23 lines
|
||||
serde_json = "1.0.142"
|
||||
serde_with = { version = "3", features = ["macros"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.13.0"
|
||||
⚡ Running sed -n '1,200p' file-search/Cargo.toml
|
||||
⎿ [package]
|
||||
edition = "2024"
|
||||
name = "codex-file-search"
|
||||
version = { workspace = true }
|
||||
|
||||
… +11 lines
|
||||
ignore = "0.4.23"
|
||||
nucleo-matcher = "0.3.1"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1.0.142"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
⚡ Running sed -n '1,200p' linux-sandbox/Cargo.toml
|
||||
⎿ [package]
|
||||
edition = "2024"
|
||||
name = "codex-linux-sandbox"
|
||||
version = { workspace = true }
|
||||
|
||||
… +24 lines
|
||||
"macros",
|
||||
"process",
|
||||
"rt-multi-thread",
|
||||
"signal",
|
||||
] }
|
||||
⚡ Running sed -n '1,200p' login/Cargo.toml
|
||||
⎿ [package]
|
||||
edition = "2024"
|
||||
name = "codex-login"
|
||||
version = { workspace = true }
|
||||
|
||||
… +17 lines
|
||||
] }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "1.4.1"
|
||||
tempfile = "3"
|
||||
⚡ Running sed -n '1,200p' mcp-client/Cargo.toml
|
||||
⎿ [package]
|
||||
name = "codex-mcp-client"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
|
||||
… +13 lines
|
||||
"process",
|
||||
"rt-multi-thread",
|
||||
"sync",
|
||||
"time",
|
||||
] }
|
||||
⚡ Running sed -n '1,200p' mcp-server/Cargo.toml
|
||||
⎿ [package]
|
||||
edition = "2024"
|
||||
name = "codex-mcp-server"
|
||||
version = { workspace = true }
|
||||
|
||||
… +35 lines
|
||||
mcp_test_support = { path = "tests/common" }
|
||||
pretty_assertions = "1.4.1"
|
||||
tempfile = "3"
|
||||
tokio-test = "0.4"
|
||||
wiremock = "0.6"
|
||||
⚡ Running sed -n '1,200p' mcp-types/Cargo.toml
|
||||
⎿ [package]
|
||||
edition = "2024"
|
||||
name = "mcp-types"
|
||||
version = { workspace = true }
|
||||
|
||||
… +1 lines
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
⚡ Running sed -n '1,200p' ollama/Cargo.toml
|
||||
⎿ [package]
|
||||
edition = "2024"
|
||||
name = "codex-ollama"
|
||||
version = { workspace = true }
|
||||
|
||||
… +22 lines
|
||||
tracing = { version = "0.1.41", features = ["log"] }
|
||||
wiremock = "0.6"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
⚡ Running sed -n '1,200p' tui/Cargo.toml
|
||||
⎿ [package]
|
||||
edition = "2024"
|
||||
name = "codex-tui"
|
||||
version = { workspace = true }
|
||||
|
||||
… +74 lines
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
insta = "1.43.1"
|
||||
pretty_assertions = "1"
|
||||
rand = "0.8"
|
||||
vt100 = "0.16.2"
|
||||
⚡ Running sed -n '1,200p' chatgpt/Cargo.toml
|
||||
⎿ [package]
|
||||
edition = "2024"
|
||||
name = "codex-chatgpt"
|
||||
version = { workspace = true }
|
||||
|
||||
… +11 lines
|
||||
serde_json = "1"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
thinking
|
||||
Considering binary size impacts
|
||||
|
||||
I’m evaluating how binary size is influenced by various factors. There are many
|
||||
binaries, such as codex and codex-exec, which include dependencies by default,
|
||||
pulling in features like Tokio and hyper. For instance, reqwest includes
|
||||
default features like TLS through rustls. If default features remain enabled,
|
||||
it inflates the size with many additional components. I notice that for musl
|
||||
targets, using vendored builds of OpenSSL could significantly increase binary
|
||||
size. This is quite the intricate web of dependencies!
|
||||
|
||||
Analyzing musl and binary size
|
||||
|
||||
I’m considering the implications of building for musl with static linking.
|
||||
In this case, OpenSSL might come into play through some crates, but reqwest
|
||||
defaults to using rustls, although openssl-sys vendored could increase binary
|
||||
size. It’s interesting to think about whether they’re targeting musl, especially
|
||||
since it seems they might often be building for Mac and Linux with dynamic
|
||||
linking instead.
|
||||
|
||||
I also realize there’s a difference between debug and release builds — release
|
||||
should be smaller due to optimizations, but if they’re defaulting to debug
|
||||
builds, that could lead to much larger binaries.
|
||||
|
||||
Assessing crate impacts on size
|
||||
|
||||
I need to be precise when analyzing binary sizes from various crates. Some
|
||||
crates, like ratatui-image, include heavy features such as libjpeg decoders,
|
||||
which can really bulk things up. Then there are the tree-sitter crates that
|
||||
contain large static grammar tables, adding significant size. The starlark crate
|
||||
and tracing-subscriber also contribute to size increases.
|
||||
|
||||
With static linking per binary being standard in Rust, it leads to multiple
|
||||
copies of the same dependencies unless cdylibs are used. That makes the compile
|
||||
times longer, but using LTO helps reduce bloat somewhat through deduplication.
|
||||
|
||||
Evaluating binary size factors
|
||||
|
||||
Having multiple binaries indeed increases the overall size significantly.
|
||||
I'm considering options like upgrading to "opt-level = 'z'" or using
|
||||
"strip=debuginfo" for reducing size. It's interesting that while the profile
|
||||
only sets symbols for stripping, not debuginfo, this could inflate sizes if
|
||||
retained in release builds.
|
||||
|
||||
The distinction between "symbols" and "debuginfo" is critical; "symbols" keeps
|
||||
debug information which may not align with the goal of minimizing size. I’d like
|
||||
to verify with the Rust documentation, but my network access is restricted.
|
||||
|
||||
Explaining large binary sizes
|
||||
|
||||
I’m analyzing why binaries in this project are so large. The primary reasons
|
||||
include monolithic static linking, where each binary includes the full
|
||||
dependency graph without shared libraries. Heavy dependencies such as reqwest,
|
||||
hyper, and TLS components also contribute significantly to size, along with
|
||||
image decoders and other libraries.
|
||||
|
||||
Using MUSL targets means vendored OpenSSL can drastically increase the size due
|
||||
to static linking. Furthermore, if the release profile keeps debug information
|
||||
due to using "strip = symbols," that can lead to larger binaries as well. It’s a
|
||||
mix of several factors, including panic strategies and platform-specific linking
|
||||
behavior.
|
||||
|
||||
Identifying binary size contributors
|
||||
|
||||
I’m considering why binaries are still large even without static linking. The
|
||||
tracing-subscriber crate potentially adds weight because it includes regex,
|
||||
though it can sometimes use a lighter version. References from files like "core/
|
||||
Cargo.toml" show dependencies like reqwest and tokio, while the tui uses the
|
||||
image crate.
|
||||
|
||||
To present this clearly, I’ll format the final answer with two main sections:
|
||||
"Main Causes" and "Build-Mode Notes." I can also include brief suggestions for
|
||||
reducing size, but I want to stay focused on answering the user's question.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
codex
|
||||
Here’s what’s driving size in this workspace’s binaries.
|
||||
|
||||
Main Causes
|
||||
|
||||
- Static linking style: Each bin (codex, codex-tui, codex-exec,
|
||||
codex-mcp-server, etc.) statically links its full dependency graph, so common
|
||||
code isn’t shared at runtime across executables.
|
||||
- Heavy deps (HTTP/TLS): reqwest brings in Hyper, HTTP/2, compressors, and a TLS
|
||||
stack (rustls by default; OpenSSL on musl). In core, login, tui, and ollama you
|
||||
enable reqwest with json/stream, which still pulls a large transitive set.
|
||||
- Image/terminal stack: codex-tui includes image (with jpeg), ratatui,
|
||||
crossterm, and ratatui-image, which together contribute significant code size
|
||||
(decoders, pixel ops, terminal backends).
|
||||
- Parsers/VMs: tree-sitter + tree-sitter-bash (in core and apply-patch) and
|
||||
starlark (in execpolicy) include sizeable parser/VM tables and runtimes.
|
||||
- Tokio runtime: Broad tokio features (rt-multi-thread, macros, process, signal)
|
||||
across many crates inflate code size even if only subsets are used per bin.
|
||||
- Panic + backtraces: Default panic = unwind and backtrace support keep
|
||||
unwinding tables and symbols that add weight.
|
||||
- Per-target OpenSSL (musl): For *-unknown-linux-musl, core enables openssl-sys
|
||||
with vendored, compiling OpenSSL into the binary—this adds multiple megabytes
|
||||
per executable.
|
||||
|
||||
Build-Mode Notes
|
||||
|
||||
- Release settings: You use lto = "fat" and codegen-units = 1 (good for size),
|
||||
but strip = "symbols" keeps debuginfo. Debuginfo is often the largest single
|
||||
contributor; if you build in release with that setting, binaries can still be
|
||||
large.
|
||||
- Debug builds: cargo build (dev profile) includes full debuginfo, no LTO, and
|
||||
assertions—outputs are much larger than cargo build --release.
|
||||
|
||||
If you want, I can outline targeted trims (e.g., strip = "debuginfo", opt-level
|
||||
= "z", panic abort, tighter tokio/reqwest features) and estimate impact per
|
||||
binary.
|
||||
8041
codex-rs/tui/tests/fixtures/oss-story.jsonl
vendored
Normal file
8041
codex-rs/tui/tests/fixtures/oss-story.jsonl
vendored
Normal file
File diff suppressed because one or more lines are too long
@@ -75,7 +75,7 @@ impl TestScenario {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hist_001_basic_insertion_no_wrap() {
|
||||
fn basic_insertion_no_wrap() {
|
||||
// Screen of 20x6; viewport is the last row (height=1 at y=5)
|
||||
let area = Rect::new(0, 5, 20, 1);
|
||||
let mut scenario = TestScenario::new(20, 6, area);
|
||||
@@ -97,7 +97,7 @@ fn hist_001_basic_insertion_no_wrap() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hist_002_long_token_wraps() {
|
||||
fn long_token_wraps() {
|
||||
let area = Rect::new(0, 5, 20, 1);
|
||||
let mut scenario = TestScenario::new(20, 6, area);
|
||||
|
||||
@@ -130,7 +130,7 @@ fn hist_002_long_token_wraps() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hist_003_emoji_and_cjk() {
|
||||
fn emoji_and_cjk() {
|
||||
let area = Rect::new(0, 5, 20, 1);
|
||||
let mut scenario = TestScenario::new(20, 6, area);
|
||||
|
||||
@@ -148,7 +148,7 @@ fn hist_003_emoji_and_cjk() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hist_004_mixed_ansi_spans() {
|
||||
fn mixed_ansi_spans() {
|
||||
let area = Rect::new(0, 5, 20, 1);
|
||||
let mut scenario = TestScenario::new(20, 6, area);
|
||||
|
||||
@@ -162,7 +162,7 @@ fn hist_004_mixed_ansi_spans() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hist_006_cursor_restoration() {
|
||||
fn cursor_restoration() {
|
||||
let area = Rect::new(0, 5, 20, 1);
|
||||
let mut scenario = TestScenario::new(20, 6, area);
|
||||
|
||||
@@ -182,7 +182,39 @@ fn hist_006_cursor_restoration() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hist_005_pre_scroll_region_down() {
|
||||
fn word_wrap_no_mid_word_split() {
|
||||
// Screen of 40x10; viewport is the last row
|
||||
let area = Rect::new(0, 9, 40, 1);
|
||||
let mut scenario = TestScenario::new(40, 10, area);
|
||||
|
||||
let sample = "Years passed, and Willowmere thrived in peace and friendship. Mira’s herb garden flourished with both ordinary and enchanted plants, and travelers spoke of the kindness of the woman who tended them.";
|
||||
let buf = scenario.run_insert(vec![Line::from(sample)]);
|
||||
let rows = scenario.screen_rows_from_bytes(&buf);
|
||||
let joined = rows.join("\n");
|
||||
assert!(
|
||||
!joined.contains("bo\nth"),
|
||||
"word 'both' should not be split across lines:\n{joined}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn em_dash_and_space_word_wrap() {
|
||||
// Repro from report: ensure we break before "inside", not mid-word.
|
||||
let area = Rect::new(0, 9, 40, 1);
|
||||
let mut scenario = TestScenario::new(40, 10, area);
|
||||
|
||||
let sample = "Mara found an old key on the shore. Curious, she opened a tarnished box half-buried in sand—and inside lay a single, glowing seed.";
|
||||
let buf = scenario.run_insert(vec![Line::from(sample)]);
|
||||
let rows = scenario.screen_rows_from_bytes(&buf);
|
||||
let joined = rows.join("\n");
|
||||
assert!(
|
||||
!joined.contains("insi\nde"),
|
||||
"word 'inside' should not be split across lines:\n{joined}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pre_scroll_region_down() {
|
||||
// Viewport not at bottom: y=3 (0-based), height=1
|
||||
let area = Rect::new(0, 3, 20, 1);
|
||||
let mut scenario = TestScenario::new(20, 6, area);
|
||||
|
||||
77
codex-rs/tui/tests/vt100_streaming_no_dup.rs
Normal file
77
codex-rs/tui/tests/vt100_streaming_no_dup.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
#![cfg(feature = "vt100-tests")]
|
||||
|
||||
use ratatui::backend::TestBackend;
|
||||
use ratatui::layout::Rect;
|
||||
use ratatui::text::Line;
|
||||
|
||||
fn term(viewport: Rect) -> codex_tui::custom_terminal::Terminal<TestBackend> {
|
||||
let backend = TestBackend::new(20, 6);
|
||||
let mut term = codex_tui::custom_terminal::Terminal::with_options(backend)
|
||||
.unwrap_or_else(|e| panic!("failed to construct terminal: {e}"));
|
||||
term.set_viewport_area(viewport);
|
||||
term
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stream_commit_trickle_no_duplication() {
|
||||
// Viewport is the last row (height=1 at y=5)
|
||||
let area = Rect::new(0, 5, 20, 1);
|
||||
let mut t = term(area);
|
||||
|
||||
// Step 1: commit first row
|
||||
let mut out1 = Vec::new();
|
||||
codex_tui::insert_history::insert_history_lines_to_writer(
|
||||
&mut t,
|
||||
&mut out1,
|
||||
vec![Line::from("one")],
|
||||
);
|
||||
|
||||
// Step 2: later commit next row
|
||||
let mut out2 = Vec::new();
|
||||
codex_tui::insert_history::insert_history_lines_to_writer(
|
||||
&mut t,
|
||||
&mut out2,
|
||||
vec![Line::from("two")],
|
||||
);
|
||||
|
||||
let combined = [out1, out2].concat();
|
||||
let s = String::from_utf8_lossy(&combined);
|
||||
assert_eq!(
|
||||
s.matches("one").count(),
|
||||
1,
|
||||
"history line duplicated: {s:?}"
|
||||
);
|
||||
assert_eq!(
|
||||
s.matches("two").count(),
|
||||
1,
|
||||
"history line duplicated: {s:?}"
|
||||
);
|
||||
assert!(
|
||||
!s.contains("three"),
|
||||
"live-only content leaked into history: {s:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn live_ring_rows_not_inserted_into_history() {
|
||||
let area = Rect::new(0, 5, 20, 1);
|
||||
let mut t = term(area);
|
||||
|
||||
// Commit two rows to history.
|
||||
let mut buf = Vec::new();
|
||||
codex_tui::insert_history::insert_history_lines_to_writer(
|
||||
&mut t,
|
||||
&mut buf,
|
||||
vec![Line::from("one"), Line::from("two")],
|
||||
);
|
||||
|
||||
// The live ring might display tail+head rows like ["two", "three"],
|
||||
// but only committed rows should be present in the history ANSI stream.
|
||||
let s = String::from_utf8_lossy(&buf);
|
||||
assert!(s.contains("one"));
|
||||
assert!(s.contains("two"));
|
||||
assert!(
|
||||
!s.contains("three"),
|
||||
"uncommitted live-ring content should not be inserted into history: {s:?}"
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user