From 488a40211abd382cf5b7b9969886b02076189a88 Mon Sep 17 00:00:00 2001
From: Jeremy Rose <172423086+nornagon-openai@users.noreply.github.com>
Date: Wed, 27 Aug 2025 13:55:59 -0700
Subject: [PATCH] fix (most) doubled lines and hanging list markers (#2789)

This was mostly written by codex under heavy guidance via test cases
drawn from logged session data and fuzzing. It also uncovered some bugs
in tui_markdown, which will in some cases split a list marker from the
list item content. We're not addressing those bugs for now.
---
 codex-rs/tui/src/markdown.rs             | 144 ++++++++++
 codex-rs/tui/src/markdown_stream.rs      | 317 +++++++++++++++++++++++
 codex-rs/tui/src/streaming/controller.rs | 186 +++++++++++++
 3 files changed, 647 insertions(+)
diff --git a/codex-rs/tui/src/markdown.rs b/codex-rs/tui/src/markdown.rs
index 8adc8f3b..fcb9e774 100644
--- a/codex-rs/tui/src/markdown.rs
+++ b/codex-rs/tui/src/markdown.rs
@@ -435,4 +435,148 @@ mod tests {
             "Hi! How can I help with codex-rs today? Want me to explore the repo, run tests, or work on a specific change?"
         );
     }
+
+    #[test]
+    fn tui_markdown_splits_ordered_marker_and_text() {
+        // With marker and content on the same line, tui_markdown keeps it as one line
+        // even in the surrounding section context.
+        let rendered = tui_markdown::from_str("Loose vs. tight list items:\n1. Tight item\n");
+        let lines: Vec<String> = rendered
+            .lines
+            .iter()
+            .map(|l| {
+                l.spans
+                    .iter()
+                    .map(|s| s.content.clone())
+                    .collect::<String>()
+            })
+            .collect();
+        assert!(
+            lines.iter().any(|w| w == "1. Tight item"),
+            "expected single line '1. Tight item' in context: {lines:?}"
+        );
+    }
+
+    #[test]
+    fn append_markdown_matches_tui_markdown_for_ordered_item() {
+        use codex_core::config_types::UriBasedFileOpener;
+        use std::path::Path;
+        let cwd = Path::new("/");
+        let mut out = Vec::new();
+        append_markdown_with_opener_and_cwd(
+            "1. Tight item\n",
+            &mut out,
+            UriBasedFileOpener::None,
+            cwd,
+        );
+        let lines: Vec<String> = out
+            .iter()
+            .map(|l| {
+                l.spans
+                    .iter()
+                    .map(|s| s.content.clone())
+                    .collect::<String>()
+            })
+            .collect();
+        assert_eq!(lines, vec!["1. Tight item".to_string()]);
+    }
+
+    #[test]
+    fn tui_markdown_shape_for_loose_tight_section() {
+        // Use the exact source from the session deltas used in tests.
+        let source = r#"
+Loose vs. tight list items:
+1. Tight item
+2. Another tight item
+
+3.
+   Loose item
+"#;
+
+        let rendered = tui_markdown::from_str(source);
+        let lines: Vec<String> = rendered
+            .lines
+            .iter()
+            .map(|l| {
+                l.spans
+                    .iter()
+                    .map(|s| s.content.clone())
+                    .collect::<String>()
+            })
+            .collect();
+        // Join into a single string and assert the exact shape we observe
+        // from tui_markdown in this larger context (marker and content split).
+        let joined = {
+            let mut s = String::new();
+            for (i, l) in lines.iter().enumerate() {
+                s.push_str(l);
+                if i + 1 < lines.len() {
+                    s.push('\n');
+                }
+            }
+            s
+        };
+        let expected = r#"Loose vs. tight list items:
+
+1. 
+Tight item
+2. 
+Another tight item
+3. 
+Loose item"#;
+        assert_eq!(
+            joined, expected,
+            "unexpected tui_markdown shape: {joined:?}"
+        );
+    }
+
+    #[test]
+    fn split_text_and_fences_keeps_ordered_list_line_as_text() {
+        // No fences here; expect a single Text segment containing the full input.
+        let src = "Loose vs. tight list items:\n1. Tight item\n";
+        let segs = super::split_text_and_fences(src);
+        assert_eq!(
+            segs.len(),
+            1,
+            "expected single text segment, got {}",
+            segs.len()
+        );
+        match &segs[0] {
+            super::Segment::Text(s) => assert_eq!(s, src),
+            _ => panic!("expected Text segment for non-fence input"),
+        }
+    }
+
+    #[test]
+    fn append_markdown_keeps_ordered_list_line_unsplit_in_context() {
+        use codex_core::config_types::UriBasedFileOpener;
+        use std::path::Path;
+        let src = "Loose vs. tight list items:\n1. Tight item\n";
+        let cwd = Path::new("/");
+        let mut out = Vec::new();
+        append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
+
+        let lines: Vec<String> = out
+            .iter()
+            .map(|l| {
+                l.spans
+                    .iter()
+                    .map(|s| s.content.clone())
+                    .collect::<String>()
+            })
+            .collect();
+
+        // Expect to find the ordered list line rendered as a single line,
+        // not split into a marker-only line followed by the text.
+        assert!(
+            lines.iter().any(|s| s == "1. Tight item"),
+            "expected '1. Tight item' rendered as a single line; got: {lines:?}"
+        );
+        assert!(
+            !lines
+                .windows(2)
+                .any(|w| w[0].trim_end() == "1." && w[1] == "Tight item"),
+            "did not expect a split into ['1.', 'Tight item']; got: {lines:?}"
+        );
+    }
 }
diff --git a/codex-rs/tui/src/markdown_stream.rs b/codex-rs/tui/src/markdown_stream.rs
index af928cf8..5dc84cb1 100644
--- a/codex-rs/tui/src/markdown_stream.rs
+++ b/codex-rs/tui/src/markdown_stream.rs
@@ -65,6 +65,21 @@ impl MarkdownStreamCollector {
         {
             complete_line_count -= 1;
         }
+        // Heuristic: if the buffer ends with a double newline and the last non-blank
+        // rendered line looks like a list bullet with inline content (e.g., "- item"),
+        // defer committing that line. Subsequent context (e.g., another list item)
+        // can cause the renderer to split the bullet marker and text into separate
+        // logical lines ("- " then "item"), which would otherwise duplicate content.
+        if self.buffer.ends_with("\n\n") && complete_line_count > 0 {
+            let last = &rendered[complete_line_count - 1];
+            let mut text = String::new();
+            for s in &last.spans {
+                text.push_str(&s.content);
+            }
+            if text.starts_with("- ") && text.trim() != "-" {
+                complete_line_count = complete_line_count.saturating_sub(1);
+            }
+        }
         if !self.buffer.ends_with('\n') {
             complete_line_count = complete_line_count.saturating_sub(1);
             // If we're inside an unclosed fenced code block, also drop the
@@ -72,6 +87,38 @@ impl MarkdownStreamCollector {
             if is_inside_unclosed_fence(&source) {
                 complete_line_count = complete_line_count.saturating_sub(1);
             }
+            // If the next (incomplete) line appears to begin a list item,
+            // also defer the previous completed line because the renderer may
+            // retroactively treat it as part of the list (e.g., ordered list item 1).
+            if let Some(last_nl) = source.rfind('\n') {
+                let tail = &source[last_nl + 1..];
+                if starts_with_list_marker(tail) {
+                    complete_line_count = complete_line_count.saturating_sub(1);
+                }
+            }
+        }
+
+        // Conservatively withhold trailing list-like lines (unordered or ordered)
+        // because streaming mid-item can cause the renderer to later split or
+        // restructure them (e.g., duplicating content or separating the marker).
+        // Only defers lines at the end of the out slice so previously committed
+        // lines remain stable.
+        if complete_line_count > self.committed_line_count {
+            let mut safe_count = complete_line_count;
+            while safe_count > self.committed_line_count {
+                let l = &rendered[safe_count - 1];
+                let mut text = String::new();
+                for s in &l.spans {
+                    text.push_str(&s.content);
+                }
+                let listish = is_potentially_volatile_list_line(&text);
+                if listish {
+                    safe_count -= 1;
+                    continue;
+                }
+                break;
+            }
+            complete_line_count = safe_count;
         }
 
         if self.committed_line_count >= complete_line_count {
@@ -86,6 +133,20 @@ impl MarkdownStreamCollector {
             return Vec::new();
         }
 
+        // Additional conservative hold-back: if exactly one short, plain word
+        // line would be emitted, defer it. This avoids committing a lone word
+        // that might become the first ordered-list item once the next delta
+        // arrives (e.g., next line starts with "2 " or "2. ").
+        if out_slice.len() == 1 {
+            let mut s = String::new();
+            for sp in &out_slice[0].spans {
+                s.push_str(&sp.content);
+            }
+            if is_short_plain_word(&s) {
+                return Vec::new();
+            }
+        }
+
         let out = out_slice.to_vec();
         self.committed_line_count = complete_line_count;
         out
@@ -118,6 +179,75 @@ impl MarkdownStreamCollector {
     }
 }
 
+#[inline]
+fn is_potentially_volatile_list_line(text: &str) -> bool {
+    let t = text.trim_end();
+    if t == "-" || t == "*" || t == "- " || t == "* " {
+        return true;
+    }
+    if t.starts_with("- ") || t.starts_with("* ") {
+        return true;
+    }
+    // ordered list like "1. " or "23. "
+    let mut it = t.chars().peekable();
+    let mut saw_digit = false;
+    while let Some(&ch) = it.peek() {
+        if ch.is_ascii_digit() {
+            saw_digit = true;
+            it.next();
+            continue;
+        }
+        break;
+    }
+    if saw_digit && it.peek() == Some(&'.') {
+        // consume '.'
+        it.next();
+        if it.peek() == Some(&' ') {
+            return true;
+        }
+    }
+    false
+}
+
+#[inline]
+fn starts_with_list_marker(text: &str) -> bool {
+    let t = text.trim_start();
+    if t.starts_with("- ") || t.starts_with("* ") || t.starts_with("-\t") || t.starts_with("*\t") {
+        return true;
+    }
+    // ordered list marker like "1 ", "1. ", "23 ", "23. "
+    let mut it = t.chars().peekable();
+    let mut saw_digit = false;
+    while let Some(&ch) = it.peek() {
+        if ch.is_ascii_digit() {
+            saw_digit = true;
+            it.next();
+        } else {
+            break;
+        }
+    }
+    if !saw_digit {
+        return false;
+    }
+    match it.peek() {
+        Some('.') => {
+            it.next();
+            matches!(it.peek(), Some(' '))
+        }
+        Some(' ') => true,
+        _ => false,
+    }
+}
+
+#[inline]
+fn is_short_plain_word(s: &str) -> bool {
+    let t = s.trim();
+    if t.is_empty() || t.len() > 5 {
+        return false;
+    }
+    t.chars().all(|c| c.is_alphanumeric())
+}
+
 /// fence helpers are provided by `crate::render::markdown_utils`
 #[cfg(test)]
 fn unwrap_markdown_language_fence_if_enabled(s: String) -> String {
@@ -530,4 +660,191 @@ mod tests {
             "heading should not merge with paragraph: {texts:?}"
         );
     }
+
+    #[test]
+    fn loose_list_with_split_dashes_matches_full_render() {
+        let cfg = test_config();
+        // Minimized failing sequence discovered by the helper: two chunks
+        // that still reproduce the mismatch.
+        let deltas = vec!["- item.\n\n", "-"];
+
+        let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
+        let streamed_strs = lines_to_plain_strings(&streamed);
+
+        let full: String = deltas.iter().copied().collect();
+        let mut rendered_all: Vec<ratatui::text::Line<'static>> = Vec::new();
+        crate::markdown::append_markdown(&full, &mut rendered_all, &cfg);
+        let rendered_all_strs = lines_to_plain_strings(&rendered_all);
+
+        assert_eq!(
+            streamed_strs, rendered_all_strs,
+            "streamed output should match full render without dangling '-' lines"
+        );
+    }
+
+    #[test]
+    fn loose_vs_tight_list_items_streaming_matches_full() {
+        let cfg = test_config();
+        // Deltas extracted from the session log around 2025-08-27T00:33:18.216Z
+        let deltas = vec![
+            "\n\n",
+            "Loose",
+            " vs",
+            ".",
+            " tight",
+            " list",
+            " items",
+            ":\n",
+            "1",
+            ".",
+            " Tight",
+            " item",
+            "\n",
+            "2",
+            ".",
+            " Another",
+            " tight",
+            " item",
+            "\n\n",
+            "1",
+            ".",
+            " Loose",
+            " item",
+            " with",
+            " its",
+            " own",
+            " paragraph",
+            ".\n\n",
+            "  ",
+            " This",
+            " paragraph",
+            " belongs",
+            " to",
+            " the",
+            " same",
+            " list",
+            " item",
+            ".\n\n",
+            "2",
+            ".",
+            " Second",
+            " loose",
+            " item",
+            " with",
+            " a",
+            " nested",
+            " list",
+            " after",
+            " a",
+            " blank",
+            " line",
+            ".\n\n",
+            "  ",
+            " -",
+            " Nested",
+            " bullet",
+            " under",
+            " a",
+            " loose",
+            " item",
+            "\n",
+            "  ",
+            " -",
+            " Another",
+            " nested",
+            " bullet",
+            "\n\n",
+        ];
+
+        let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
+        let streamed_strs = lines_to_plain_strings(&streamed);
+
+        // Compute a full render for diagnostics only.
+        let full: String = deltas.iter().copied().collect();
+        let mut rendered_all: Vec<ratatui::text::Line<'static>> = Vec::new();
+        crate::markdown::append_markdown(&full, &mut rendered_all, &cfg);
+
+        // Also assert exact expected plain strings for clarity.
+        let expected = vec![
+            "Loose vs. tight list items:".to_string(),
+            "".to_string(),
+            "1. ".to_string(),
+            "Tight item".to_string(),
+            "2. ".to_string(),
+            "Another tight item".to_string(),
+            "3. ".to_string(),
+            "Loose item with its own paragraph.".to_string(),
+            "".to_string(),
+            "This paragraph belongs to the same list item.".to_string(),
+            "4. ".to_string(),
+            "Second loose item with a nested list after a blank line.".to_string(),
+            "    - Nested bullet under a loose item".to_string(),
+            "    - Another nested bullet".to_string(),
+        ];
+        assert_eq!(
+            streamed_strs, expected,
+            "expected exact rendered lines for loose/tight section"
+        );
+    }
+
+    // Targeted tests derived from fuzz findings. Each asserts streamed == full render.
+
+    #[test]
+    fn fuzz_class_bare_dash_then_task_item() {
+        let cfg = test_config();
+        // Case similar to: ["two\n", "- \n* [x] done "]
+        let deltas = vec!["two\n", "- \n* [x] done \n"];
+        let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
+        let streamed_strs = lines_to_plain_strings(&streamed);
+        let full: String = deltas.iter().copied().collect();
+        let mut rendered: Vec<ratatui::text::Line<'static>> = Vec::new();
+        crate::markdown::append_markdown(&full, &mut rendered, &cfg);
+        let rendered_strs = lines_to_plain_strings(&rendered);
+        assert_eq!(streamed_strs, rendered_strs);
+    }
+
+    #[test]
+    fn fuzz_class_bullet_duplication_variant_1() {
+        let cfg = test_config();
+        // Case similar to: ["aph.\n- let one\n- bull", "et two\n\n  second paragraph "]
+        let deltas = vec!["aph.\n- let one\n- bull", "et two\n\n  second paragraph \n"];
+        let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
+        let streamed_strs = lines_to_plain_strings(&streamed);
+        let full: String = deltas.iter().copied().collect();
+        let mut rendered: Vec<ratatui::text::Line<'static>> = Vec::new();
+        crate::markdown::append_markdown(&full, &mut rendered, &cfg);
+        let rendered_strs = lines_to_plain_strings(&rendered);
+        assert_eq!(streamed_strs, rendered_strs);
+    }
+
+    #[test]
+    fn fuzz_class_bullet_duplication_variant_2() {
+        let cfg = test_config();
+        // Case similar to: ["- e\n  c", "e\n- bullet two\n\n  second paragraph in bullet two\n"]
+        let deltas = vec![
+            "- e\n  c",
+            "e\n- bullet two\n\n  second paragraph in bullet two\n",
+        ];
+        let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
+        let streamed_strs = lines_to_plain_strings(&streamed);
+        let full: String = deltas.iter().copied().collect();
+        let mut rendered: Vec<ratatui::text::Line<'static>> = Vec::new();
+        crate::markdown::append_markdown(&full, &mut rendered, &cfg);
+        let rendered_strs = lines_to_plain_strings(&rendered);
+        assert_eq!(streamed_strs, rendered_strs);
+    }
+
+    #[test]
+    fn fuzz_class_ordered_list_split_weirdness() {
+        let cfg = test_config();
+        // Case similar to: ["one\n2", " two\n- \n* [x] d"]
+        let deltas = vec!["one\n2", " two\n- \n* [x] d\n"];
+        let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
+        let streamed_strs = lines_to_plain_strings(&streamed);
+        let full: String = deltas.iter().copied().collect();
+        let mut rendered: Vec<ratatui::text::Line<'static>> = Vec::new();
+        crate::markdown::append_markdown(&full, &mut rendered, &cfg);
+        let rendered_strs = lines_to_plain_strings(&rendered);
+        assert_eq!(streamed_strs, rendered_strs);
+    }
 }
diff --git a/codex-rs/tui/src/streaming/controller.rs b/codex-rs/tui/src/streaming/controller.rs
index b9afa893..9c6db452 100644
--- a/codex-rs/tui/src/streaming/controller.rs
+++ b/codex-rs/tui/src/streaming/controller.rs
@@ -214,3 +214,189 @@ impl StreamController {
         self.finalize(true, sink)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use codex_core::config::Config;
+    use codex_core::config::ConfigOverrides;
+    use std::cell::RefCell;
+
+    fn test_config() -> Config {
+        let overrides = ConfigOverrides {
+            cwd: std::env::current_dir().ok(),
+            ..Default::default()
+        };
+        match Config::load_with_cli_overrides(vec![], overrides) {
+            Ok(c) => c,
+            Err(e) => panic!("load test config: {e}"),
+        }
+    }
+
+    struct TestSink {
+        pub lines: RefCell<Vec<Vec<Line<'static>>>>,
+    }
+    impl TestSink {
+        fn new() -> Self {
+            Self {
+                lines: RefCell::new(Vec::new()),
+            }
+        }
+    }
+    impl HistorySink for TestSink {
+        fn insert_history(&self, lines: Vec<Line<'static>>) {
+            self.lines.borrow_mut().push(lines);
+        }
+        fn start_commit_animation(&self) {}
+        fn stop_commit_animation(&self) {}
+    }
+
+    fn lines_to_plain_strings(lines: &[ratatui::text::Line<'_>]) -> Vec<String> {
+        lines
+            .iter()
+            .map(|l| {
+                l.spans
+                    .iter()
+                    .map(|s| s.content.clone())
+                    .collect::<Vec<_>>()
+                    .join("")
+            })
+            .collect()
+    }
+
+    #[test]
+    fn controller_loose_vs_tight_with_commit_ticks_matches_full() {
+        let cfg = test_config();
+        let mut ctrl = StreamController::new(cfg.clone());
+        let sink = TestSink::new();
+        ctrl.begin(&sink);
+
+        // Exact deltas from the session log (section: Loose vs. tight list items)
+        let deltas = vec![
+            "\n\n",
+            "Loose",
+            " vs",
+            ".",
+            " tight",
+            " list",
+            " items",
+            ":\n",
+            "1",
+            ".",
+            " Tight",
+            " item",
+            "\n",
+            "2",
+            ".",
+            " Another",
+            " tight",
+            " item",
+            "\n\n",
+            "1",
+            ".",
+            " Loose",
+            " item",
+            " with",
+            " its",
+            " own",
+            " paragraph",
+            ".\n\n",
+            "  ",
+            " This",
+            " paragraph",
+            " belongs",
+            " to",
+            " the",
+            " same",
+            " list",
+            " item",
+            ".\n\n",
+            "2",
+            ".",
+            " Second",
+            " loose",
+            " item",
+            " with",
+            " a",
+            " nested",
+            " list",
+            " after",
+            " a",
+            " blank",
+            " line",
+            ".\n\n",
+            "  ",
+            " -",
+            " Nested",
+            " bullet",
+            " under",
+            " a",
+            " loose",
+            " item",
+            "\n",
+            "  ",
+            " -",
+            " Another",
+            " nested",
+            " bullet",
+            "\n\n",
+        ];
+
+        // Simulate streaming with a commit tick attempt after each delta.
+        for d in &deltas {
+            ctrl.push_and_maybe_commit(d, &sink);
+            let _ = ctrl.on_commit_tick(&sink);
+        }
+        // Finalize and flush remaining lines now.
+        let _ = ctrl.finalize(true, &sink);
+
+        // Flatten sink output and strip the header that the controller inserts (blank + "codex").
+        let mut flat: Vec<ratatui::text::Line<'static>> = Vec::new();
+        for batch in sink.lines.borrow().iter() {
+            for l in batch {
+                flat.push(l.clone());
+            }
+        }
+        // Drop leading blank and header line if present.
+        if !flat.is_empty() && lines_to_plain_strings(&[flat[0].clone()])[0].is_empty() {
+            flat.remove(0);
+        }
+        if !flat.is_empty() {
+            let s0 = lines_to_plain_strings(&[flat[0].clone()])[0].clone();
+            if s0 == "codex" {
+                flat.remove(0);
+            }
+        }
+        let streamed = lines_to_plain_strings(&flat);
+
+        // Full render of the same source
+        let source: String = deltas.iter().copied().collect();
+        let mut rendered: Vec<ratatui::text::Line<'static>> = Vec::new();
+        crate::markdown::append_markdown(&source, &mut rendered, &cfg);
+        let rendered_strs = lines_to_plain_strings(&rendered);
+
+        assert_eq!(streamed, rendered_strs);
+
+        // Also assert exact expected plain strings for clarity.
+        let expected = vec![
+            "Loose vs. tight list items:".to_string(),
+            "".to_string(),
+            "1. ".to_string(),
+            "Tight item".to_string(),
+            "2. ".to_string(),
+            "Another tight item".to_string(),
+            "3. ".to_string(),
+            "Loose item with its own paragraph.".to_string(),
+            "".to_string(),
+            "This paragraph belongs to the same list item.".to_string(),
+            "4. ".to_string(),
+            "Second loose item with a nested list after a blank line.".to_string(),
+            "    - Nested bullet under a loose item".to_string(),
+            "    - Another nested bullet".to_string(),
+        ];
+        assert_eq!(
+            streamed, expected,
+            "expected exact rendered lines for loose/tight section"
+        );
+    }
+}