From 488a40211abd382cf5b7b9969886b02076189a88 Mon Sep 17 00:00:00 2001 From: Jeremy Rose <172423086+nornagon-openai@users.noreply.github.com> Date: Wed, 27 Aug 2025 13:55:59 -0700 Subject: [PATCH] fix (most) doubled lines and hanging list markers (#2789) This was mostly written by codex under heavy guidance via test cases drawn from logged session data and fuzzing. It also uncovered some bugs in tui_markdown, which will in some cases split a list marker from the list item content. We're not addressing those bugs for now. --- codex-rs/tui/src/markdown.rs | 144 ++++++++++ codex-rs/tui/src/markdown_stream.rs | 317 +++++++++++++++++++++++ codex-rs/tui/src/streaming/controller.rs | 186 +++++++++++++ 3 files changed, 647 insertions(+) diff --git a/codex-rs/tui/src/markdown.rs b/codex-rs/tui/src/markdown.rs index 8adc8f3b..fcb9e774 100644 --- a/codex-rs/tui/src/markdown.rs +++ b/codex-rs/tui/src/markdown.rs @@ -435,4 +435,148 @@ mod tests { "Hi! How can I help with codex-rs today? Want me to explore the repo, run tests, or work on a specific change?" ); } + + #[test] + fn tui_markdown_splits_ordered_marker_and_text() { + // With marker and content on the same line, tui_markdown keeps it as one line + // even in the surrounding section context. + let rendered = tui_markdown::from_str("Loose vs. tight list items:\n1. Tight item\n"); + let lines: Vec = rendered + .lines + .iter() + .map(|l| { + l.spans + .iter() + .map(|s| s.content.clone()) + .collect::() + }) + .collect(); + assert!( + lines.iter().any(|w| w == "1. Tight item"), + "expected single line '1. Tight item' in context: {lines:?}" + ); + } + + #[test] + fn append_markdown_matches_tui_markdown_for_ordered_item() { + use codex_core::config_types::UriBasedFileOpener; + use std::path::Path; + let cwd = Path::new("/"); + let mut out = Vec::new(); + append_markdown_with_opener_and_cwd( + "1. Tight item\n", + &mut out, + UriBasedFileOpener::None, + cwd, + ); + let lines: Vec = out + .iter() + .map(|l| { + l.spans + .iter() + .map(|s| s.content.clone()) + .collect::() + }) + .collect(); + assert_eq!(lines, vec!["1. Tight item".to_string()]); + } + + #[test] + fn tui_markdown_shape_for_loose_tight_section() { + // Use the exact source from the session deltas used in tests. + let source = r#" +Loose vs. tight list items: +1. Tight item +2. Another tight item + +3. + Loose item +"#; + + let rendered = tui_markdown::from_str(source); + let lines: Vec = rendered + .lines + .iter() + .map(|l| { + l.spans + .iter() + .map(|s| s.content.clone()) + .collect::() + }) + .collect(); + // Join into a single string and assert the exact shape we observe + // from tui_markdown in this larger context (marker and content split). + let joined = { + let mut s = String::new(); + for (i, l) in lines.iter().enumerate() { + s.push_str(l); + if i + 1 < lines.len() { + s.push('\n'); + } + } + s + }; + let expected = r#"Loose vs. tight list items: + +1. +Tight item +2. +Another tight item +3. +Loose item"#; + assert_eq!( + joined, expected, + "unexpected tui_markdown shape: {joined:?}" + ); + } + + #[test] + fn split_text_and_fences_keeps_ordered_list_line_as_text() { + // No fences here; expect a single Text segment containing the full input. + let src = "Loose vs. tight list items:\n1. Tight item\n"; + let segs = super::split_text_and_fences(src); + assert_eq!( + segs.len(), + 1, + "expected single text segment, got {}", + segs.len() + ); + match &segs[0] { + super::Segment::Text(s) => assert_eq!(s, src), + _ => panic!("expected Text segment for non-fence input"), + } + } + + #[test] + fn append_markdown_keeps_ordered_list_line_unsplit_in_context() { + use codex_core::config_types::UriBasedFileOpener; + use std::path::Path; + let src = "Loose vs. tight list items:\n1. Tight item\n"; + let cwd = Path::new("/"); + let mut out = Vec::new(); + append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd); + + let lines: Vec = out + .iter() + .map(|l| { + l.spans + .iter() + .map(|s| s.content.clone()) + .collect::() + }) + .collect(); + + // Expect to find the ordered list line rendered as a single line, + // not split into a marker-only line followed by the text. + assert!( + lines.iter().any(|s| s == "1. Tight item"), + "expected '1. Tight item' rendered as a single line; got: {lines:?}" + ); + assert!( + !lines + .windows(2) + .any(|w| w[0].trim_end() == "1." && w[1] == "Tight item"), + "did not expect a split into ['1.', 'Tight item']; got: {lines:?}" + ); + } } diff --git a/codex-rs/tui/src/markdown_stream.rs b/codex-rs/tui/src/markdown_stream.rs index af928cf8..5dc84cb1 100644 --- a/codex-rs/tui/src/markdown_stream.rs +++ b/codex-rs/tui/src/markdown_stream.rs @@ -65,6 +65,21 @@ impl MarkdownStreamCollector { { complete_line_count -= 1; } + // Heuristic: if the buffer ends with a double newline and the last non-blank + // rendered line looks like a list bullet with inline content (e.g., "- item"), + // defer committing that line. Subsequent context (e.g., another list item) + // can cause the renderer to split the bullet marker and text into separate + // logical lines ("- " then "item"), which would otherwise duplicate content. + if self.buffer.ends_with("\n\n") && complete_line_count > 0 { + let last = &rendered[complete_line_count - 1]; + let mut text = String::new(); + for s in &last.spans { + text.push_str(&s.content); + } + if text.starts_with("- ") && text.trim() != "-" { + complete_line_count = complete_line_count.saturating_sub(1); + } + } if !self.buffer.ends_with('\n') { complete_line_count = complete_line_count.saturating_sub(1); // If we're inside an unclosed fenced code block, also drop the @@ -72,6 +87,38 @@ impl MarkdownStreamCollector { if is_inside_unclosed_fence(&source) { complete_line_count = complete_line_count.saturating_sub(1); } + // If the next (incomplete) line appears to begin a list item, + // also defer the previous completed line because the renderer may + // retroactively treat it as part of the list (e.g., ordered list item 1). + if let Some(last_nl) = source.rfind('\n') { + let tail = &source[last_nl + 1..]; + if starts_with_list_marker(tail) { + complete_line_count = complete_line_count.saturating_sub(1); + } + } + } + + // Conservatively withhold trailing list-like lines (unordered or ordered) + // because streaming mid-item can cause the renderer to later split or + // restructure them (e.g., duplicating content or separating the marker). + // Only defers lines at the end of the out slice so previously committed + // lines remain stable. + if complete_line_count > self.committed_line_count { + let mut safe_count = complete_line_count; + while safe_count > self.committed_line_count { + let l = &rendered[safe_count - 1]; + let mut text = String::new(); + for s in &l.spans { + text.push_str(&s.content); + } + let listish = is_potentially_volatile_list_line(&text); + if listish { + safe_count -= 1; + continue; + } + break; + } + complete_line_count = safe_count; } if self.committed_line_count >= complete_line_count { @@ -86,6 +133,20 @@ impl MarkdownStreamCollector { return Vec::new(); } + // Additional conservative hold-back: if exactly one short, plain word + // line would be emitted, defer it. This avoids committing a lone word + // that might become the first ordered-list item once the next delta + // arrives (e.g., next line starts with "2 " or "2. "). + if out_slice.len() == 1 { + let mut s = String::new(); + for sp in &out_slice[0].spans { + s.push_str(&sp.content); + } + if is_short_plain_word(&s) { + return Vec::new(); + } + } + let out = out_slice.to_vec(); self.committed_line_count = complete_line_count; out @@ -118,6 +179,75 @@ impl MarkdownStreamCollector { } } +#[inline] +fn is_potentially_volatile_list_line(text: &str) -> bool { + let t = text.trim_end(); + if t == "-" || t == "*" || t == "- " || t == "* " { + return true; + } + if t.starts_with("- ") || t.starts_with("* ") { + return true; + } + // ordered list like "1. " or "23. " + let mut it = t.chars().peekable(); + let mut saw_digit = false; + while let Some(&ch) = it.peek() { + if ch.is_ascii_digit() { + saw_digit = true; + it.next(); + continue; + } + break; + } + if saw_digit && it.peek() == Some(&'.') { + // consume '.' + it.next(); + if it.peek() == Some(&' ') { + return true; + } + } + false +} + +#[inline] +fn starts_with_list_marker(text: &str) -> bool { + let t = text.trim_start(); + if t.starts_with("- ") || t.starts_with("* ") || t.starts_with("-\t") || t.starts_with("*\t") { + return true; + } + // ordered list marker like "1 ", "1. ", "23 ", "23. " + let mut it = t.chars().peekable(); + let mut saw_digit = false; + while let Some(&ch) = it.peek() { + if ch.is_ascii_digit() { + saw_digit = true; + it.next(); + } else { + break; + } + } + if !saw_digit { + return false; + } + match it.peek() { + Some('.') => { + it.next(); + matches!(it.peek(), Some(' ')) + } + Some(' ') => true, + _ => false, + } +} + +#[inline] +fn is_short_plain_word(s: &str) -> bool { + let t = s.trim(); + if t.is_empty() || t.len() > 5 { + return false; + } + t.chars().all(|c| c.is_alphanumeric()) +} + /// fence helpers are provided by `crate::render::markdown_utils` #[cfg(test)] fn unwrap_markdown_language_fence_if_enabled(s: String) -> String { @@ -530,4 +660,191 @@ mod tests { "heading should not merge with paragraph: {texts:?}" ); } + + #[test] + fn loose_list_with_split_dashes_matches_full_render() { + let cfg = test_config(); + // Minimized failing sequence discovered by the helper: two chunks + // that still reproduce the mismatch. + let deltas = vec!["- item.\n\n", "-"]; + + let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg); + let streamed_strs = lines_to_plain_strings(&streamed); + + let full: String = deltas.iter().copied().collect(); + let mut rendered_all: Vec> = Vec::new(); + crate::markdown::append_markdown(&full, &mut rendered_all, &cfg); + let rendered_all_strs = lines_to_plain_strings(&rendered_all); + + assert_eq!( + streamed_strs, rendered_all_strs, + "streamed output should match full render without dangling '-' lines" + ); + } + + #[test] + fn loose_vs_tight_list_items_streaming_matches_full() { + let cfg = test_config(); + // Deltas extracted from the session log around 2025-08-27T00:33:18.216Z + let deltas = vec![ + "\n\n", + "Loose", + " vs", + ".", + " tight", + " list", + " items", + ":\n", + "1", + ".", + " Tight", + " item", + "\n", + "2", + ".", + " Another", + " tight", + " item", + "\n\n", + "1", + ".", + " Loose", + " item", + " with", + " its", + " own", + " paragraph", + ".\n\n", + " ", + " This", + " paragraph", + " belongs", + " to", + " the", + " same", + " list", + " item", + ".\n\n", + "2", + ".", + " Second", + " loose", + " item", + " with", + " a", + " nested", + " list", + " after", + " a", + " blank", + " line", + ".\n\n", + " ", + " -", + " Nested", + " bullet", + " under", + " a", + " loose", + " item", + "\n", + " ", + " -", + " Another", + " nested", + " bullet", + "\n\n", + ]; + + let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg); + let streamed_strs = lines_to_plain_strings(&streamed); + + // Compute a full render for diagnostics only. + let full: String = deltas.iter().copied().collect(); + let mut rendered_all: Vec> = Vec::new(); + crate::markdown::append_markdown(&full, &mut rendered_all, &cfg); + + // Also assert exact expected plain strings for clarity. + let expected = vec![ + "Loose vs. tight list items:".to_string(), + "".to_string(), + "1. ".to_string(), + "Tight item".to_string(), + "2. ".to_string(), + "Another tight item".to_string(), + "3. ".to_string(), + "Loose item with its own paragraph.".to_string(), + "".to_string(), + "This paragraph belongs to the same list item.".to_string(), + "4. ".to_string(), + "Second loose item with a nested list after a blank line.".to_string(), + " - Nested bullet under a loose item".to_string(), + " - Another nested bullet".to_string(), + ]; + assert_eq!( + streamed_strs, expected, + "expected exact rendered lines for loose/tight section" + ); + } + + // Targeted tests derived from fuzz findings. Each asserts streamed == full render. + + #[test] + fn fuzz_class_bare_dash_then_task_item() { + let cfg = test_config(); + // Case similar to: ["two\n", "- \n* [x] done "] + let deltas = vec!["two\n", "- \n* [x] done \n"]; + let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg); + let streamed_strs = lines_to_plain_strings(&streamed); + let full: String = deltas.iter().copied().collect(); + let mut rendered: Vec> = Vec::new(); + crate::markdown::append_markdown(&full, &mut rendered, &cfg); + let rendered_strs = lines_to_plain_strings(&rendered); + assert_eq!(streamed_strs, rendered_strs); + } + + #[test] + fn fuzz_class_bullet_duplication_variant_1() { + let cfg = test_config(); + // Case similar to: ["aph.\n- let one\n- bull", "et two\n\n second paragraph "] + let deltas = vec!["aph.\n- let one\n- bull", "et two\n\n second paragraph \n"]; + let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg); + let streamed_strs = lines_to_plain_strings(&streamed); + let full: String = deltas.iter().copied().collect(); + let mut rendered: Vec> = Vec::new(); + crate::markdown::append_markdown(&full, &mut rendered, &cfg); + let rendered_strs = lines_to_plain_strings(&rendered); + assert_eq!(streamed_strs, rendered_strs); + } + + #[test] + fn fuzz_class_bullet_duplication_variant_2() { + let cfg = test_config(); + // Case similar to: ["- e\n c", "e\n- bullet two\n\n second paragraph in bullet two\n"] + let deltas = vec![ + "- e\n c", + "e\n- bullet two\n\n second paragraph in bullet two\n", + ]; + let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg); + let streamed_strs = lines_to_plain_strings(&streamed); + let full: String = deltas.iter().copied().collect(); + let mut rendered: Vec> = Vec::new(); + crate::markdown::append_markdown(&full, &mut rendered, &cfg); + let rendered_strs = lines_to_plain_strings(&rendered); + assert_eq!(streamed_strs, rendered_strs); + } + + #[test] + fn fuzz_class_ordered_list_split_weirdness() { + let cfg = test_config(); + // Case similar to: ["one\n2", " two\n- \n* [x] d"] + let deltas = vec!["one\n2", " two\n- \n* [x] d\n"]; + let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg); + let streamed_strs = lines_to_plain_strings(&streamed); + let full: String = deltas.iter().copied().collect(); + let mut rendered: Vec> = Vec::new(); + crate::markdown::append_markdown(&full, &mut rendered, &cfg); + let rendered_strs = lines_to_plain_strings(&rendered); + assert_eq!(streamed_strs, rendered_strs); + } } diff --git a/codex-rs/tui/src/streaming/controller.rs b/codex-rs/tui/src/streaming/controller.rs index b9afa893..9c6db452 100644 --- a/codex-rs/tui/src/streaming/controller.rs +++ b/codex-rs/tui/src/streaming/controller.rs @@ -214,3 +214,189 @@ impl StreamController { self.finalize(true, sink) } } + +#[cfg(test)] +mod tests { + use super::*; + use codex_core::config::Config; + use codex_core::config::ConfigOverrides; + use std::cell::RefCell; + + fn test_config() -> Config { + let overrides = ConfigOverrides { + cwd: std::env::current_dir().ok(), + ..Default::default() + }; + match Config::load_with_cli_overrides(vec![], overrides) { + Ok(c) => c, + Err(e) => panic!("load test config: {e}"), + } + } + + struct TestSink { + pub lines: RefCell>>>, + } + impl TestSink { + fn new() -> Self { + Self { + lines: RefCell::new(Vec::new()), + } + } + } + impl HistorySink for TestSink { + fn insert_history(&self, lines: Vec>) { + self.lines.borrow_mut().push(lines); + } + fn start_commit_animation(&self) {} + fn stop_commit_animation(&self) {} + } + + fn lines_to_plain_strings(lines: &[ratatui::text::Line<'_>]) -> Vec { + lines + .iter() + .map(|l| { + l.spans + .iter() + .map(|s| s.content.clone()) + .collect::>() + .join("") + }) + .collect() + } + + #[test] + fn controller_loose_vs_tight_with_commit_ticks_matches_full() { + let cfg = test_config(); + let mut ctrl = StreamController::new(cfg.clone()); + let sink = TestSink::new(); + ctrl.begin(&sink); + + // Exact deltas from the session log (section: Loose vs. tight list items) + let deltas = vec![ + "\n\n", + "Loose", + " vs", + ".", + " tight", + " list", + " items", + ":\n", + "1", + ".", + " Tight", + " item", + "\n", + "2", + ".", + " Another", + " tight", + " item", + "\n\n", + "1", + ".", + " Loose", + " item", + " with", + " its", + " own", + " paragraph", + ".\n\n", + " ", + " This", + " paragraph", + " belongs", + " to", + " the", + " same", + " list", + " item", + ".\n\n", + "2", + ".", + " Second", + " loose", + " item", + " with", + " a", + " nested", + " list", + " after", + " a", + " blank", + " line", + ".\n\n", + " ", + " -", + " Nested", + " bullet", + " under", + " a", + " loose", + " item", + "\n", + " ", + " -", + " Another", + " nested", + " bullet", + "\n\n", + ]; + + // Simulate streaming with a commit tick attempt after each delta. + for d in &deltas { + ctrl.push_and_maybe_commit(d, &sink); + let _ = ctrl.on_commit_tick(&sink); + } + // Finalize and flush remaining lines now. + let _ = ctrl.finalize(true, &sink); + + // Flatten sink output and strip the header that the controller inserts (blank + "codex"). + let mut flat: Vec> = Vec::new(); + for batch in sink.lines.borrow().iter() { + for l in batch { + flat.push(l.clone()); + } + } + // Drop leading blank and header line if present. + if !flat.is_empty() && lines_to_plain_strings(&[flat[0].clone()])[0].is_empty() { + flat.remove(0); + } + if !flat.is_empty() { + let s0 = lines_to_plain_strings(&[flat[0].clone()])[0].clone(); + if s0 == "codex" { + flat.remove(0); + } + } + let streamed = lines_to_plain_strings(&flat); + + // Full render of the same source + let source: String = deltas.iter().copied().collect(); + let mut rendered: Vec> = Vec::new(); + crate::markdown::append_markdown(&source, &mut rendered, &cfg); + let rendered_strs = lines_to_plain_strings(&rendered); + + assert_eq!(streamed, rendered_strs); + + // Also assert exact expected plain strings for clarity. + let expected = vec![ + "Loose vs. tight list items:".to_string(), + "".to_string(), + "1. ".to_string(), + "Tight item".to_string(), + "2. ".to_string(), + "Another tight item".to_string(), + "3. ".to_string(), + "Loose item with its own paragraph.".to_string(), + "".to_string(), + "This paragraph belongs to the same list item.".to_string(), + "4. ".to_string(), + "Second loose item with a nested list after a blank line.".to_string(), + " - Nested bullet under a loose item".to_string(), + " - Another nested bullet".to_string(), + ]; + assert_eq!( + streamed, expected, + "expected exact rendered lines for loose/tight section" + ); + } +}