fix (most) doubled lines and hanging list markers (#2789)

This was mostly written by codex under heavy guidance via test cases
drawn from logged session data and fuzzing. It also uncovered some bugs
in tui_markdown, which will in some cases split a list marker from the
list item content. We're not addressing those bugs for now.
This commit is contained in:
Jeremy Rose
2025-08-27 13:55:59 -07:00
committed by GitHub
parent 903178eeeb
commit 488a40211a
3 changed files with 647 additions and 0 deletions

View File

@@ -435,4 +435,148 @@ mod tests {
"Hi! How can I help with codex-rs today? Want me to explore the repo, run tests, or work on a specific change?"
);
}
#[test]
fn tui_markdown_splits_ordered_marker_and_text() {
// With marker and content on the same line, tui_markdown keeps it as one line
// even in the surrounding section context.
let rendered = tui_markdown::from_str("Loose vs. tight list items:\n1. Tight item\n");
let lines: Vec<String> = rendered
.lines
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
assert!(
lines.iter().any(|w| w == "1. Tight item"),
"expected single line '1. Tight item' in context: {lines:?}"
);
}
#[test]
fn append_markdown_matches_tui_markdown_for_ordered_item() {
use codex_core::config_types::UriBasedFileOpener;
use std::path::Path;
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(
"1. Tight item\n",
&mut out,
UriBasedFileOpener::None,
cwd,
);
let lines: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
assert_eq!(lines, vec!["1. Tight item".to_string()]);
}
#[test]
fn tui_markdown_shape_for_loose_tight_section() {
// Use the exact source from the session deltas used in tests.
let source = r#"
Loose vs. tight list items:
1. Tight item
2. Another tight item
3.
Loose item
"#;
let rendered = tui_markdown::from_str(source);
let lines: Vec<String> = rendered
.lines
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
// Join into a single string and assert the exact shape we observe
// from tui_markdown in this larger context (marker and content split).
let joined = {
let mut s = String::new();
for (i, l) in lines.iter().enumerate() {
s.push_str(l);
if i + 1 < lines.len() {
s.push('\n');
}
}
s
};
let expected = r#"Loose vs. tight list items:
1.
Tight item
2.
Another tight item
3.
Loose item"#;
assert_eq!(
joined, expected,
"unexpected tui_markdown shape: {joined:?}"
);
}
#[test]
fn split_text_and_fences_keeps_ordered_list_line_as_text() {
// No fences here; expect a single Text segment containing the full input.
let src = "Loose vs. tight list items:\n1. Tight item\n";
let segs = super::split_text_and_fences(src);
assert_eq!(
segs.len(),
1,
"expected single text segment, got {}",
segs.len()
);
match &segs[0] {
super::Segment::Text(s) => assert_eq!(s, src),
_ => panic!("expected Text segment for non-fence input"),
}
}
#[test]
fn append_markdown_keeps_ordered_list_line_unsplit_in_context() {
use codex_core::config_types::UriBasedFileOpener;
use std::path::Path;
let src = "Loose vs. tight list items:\n1. Tight item\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
let lines: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
// Expect to find the ordered list line rendered as a single line,
// not split into a marker-only line followed by the text.
assert!(
lines.iter().any(|s| s == "1. Tight item"),
"expected '1. Tight item' rendered as a single line; got: {lines:?}"
);
assert!(
!lines
.windows(2)
.any(|w| w[0].trim_end() == "1." && w[1] == "Tight item"),
"did not expect a split into ['1.', 'Tight item']; got: {lines:?}"
);
}
}

View File

@@ -65,6 +65,21 @@ impl MarkdownStreamCollector {
{
complete_line_count -= 1;
}
// Heuristic: if the buffer ends with a double newline and the last non-blank
// rendered line looks like a list bullet with inline content (e.g., "- item"),
// defer committing that line. Subsequent context (e.g., another list item)
// can cause the renderer to split the bullet marker and text into separate
// logical lines ("- " then "item"), which would otherwise duplicate content.
if self.buffer.ends_with("\n\n") && complete_line_count > 0 {
let last = &rendered[complete_line_count - 1];
let mut text = String::new();
for s in &last.spans {
text.push_str(&s.content);
}
if text.starts_with("- ") && text.trim() != "-" {
complete_line_count = complete_line_count.saturating_sub(1);
}
}
if !self.buffer.ends_with('\n') {
complete_line_count = complete_line_count.saturating_sub(1);
// If we're inside an unclosed fenced code block, also drop the
@@ -72,6 +87,38 @@ impl MarkdownStreamCollector {
if is_inside_unclosed_fence(&source) {
complete_line_count = complete_line_count.saturating_sub(1);
}
// If the next (incomplete) line appears to begin a list item,
// also defer the previous completed line because the renderer may
// retroactively treat it as part of the list (e.g., ordered list item 1).
if let Some(last_nl) = source.rfind('\n') {
let tail = &source[last_nl + 1..];
if starts_with_list_marker(tail) {
complete_line_count = complete_line_count.saturating_sub(1);
}
}
}
// Conservatively withhold trailing list-like lines (unordered or ordered)
// because streaming mid-item can cause the renderer to later split or
// restructure them (e.g., duplicating content or separating the marker).
// Only defers lines at the end of the out slice so previously committed
// lines remain stable.
if complete_line_count > self.committed_line_count {
let mut safe_count = complete_line_count;
while safe_count > self.committed_line_count {
let l = &rendered[safe_count - 1];
let mut text = String::new();
for s in &l.spans {
text.push_str(&s.content);
}
let listish = is_potentially_volatile_list_line(&text);
if listish {
safe_count -= 1;
continue;
}
break;
}
complete_line_count = safe_count;
}
if self.committed_line_count >= complete_line_count {
@@ -86,6 +133,20 @@ impl MarkdownStreamCollector {
return Vec::new();
}
// Additional conservative hold-back: if exactly one short, plain word
// line would be emitted, defer it. This avoids committing a lone word
// that might become the first ordered-list item once the next delta
// arrives (e.g., next line starts with "2 " or "2. ").
if out_slice.len() == 1 {
let mut s = String::new();
for sp in &out_slice[0].spans {
s.push_str(&sp.content);
}
if is_short_plain_word(&s) {
return Vec::new();
}
}
let out = out_slice.to_vec();
self.committed_line_count = complete_line_count;
out
@@ -118,6 +179,75 @@ impl MarkdownStreamCollector {
}
}
#[inline]
fn is_potentially_volatile_list_line(text: &str) -> bool {
let t = text.trim_end();
if t == "-" || t == "*" || t == "- " || t == "* " {
return true;
}
if t.starts_with("- ") || t.starts_with("* ") {
return true;
}
// ordered list like "1. " or "23. "
let mut it = t.chars().peekable();
let mut saw_digit = false;
while let Some(&ch) = it.peek() {
if ch.is_ascii_digit() {
saw_digit = true;
it.next();
continue;
}
break;
}
if saw_digit && it.peek() == Some(&'.') {
// consume '.'
it.next();
if it.peek() == Some(&' ') {
return true;
}
}
false
}
#[inline]
fn starts_with_list_marker(text: &str) -> bool {
let t = text.trim_start();
if t.starts_with("- ") || t.starts_with("* ") || t.starts_with("-\t") || t.starts_with("*\t") {
return true;
}
// ordered list marker like "1 ", "1. ", "23 ", "23. "
let mut it = t.chars().peekable();
let mut saw_digit = false;
while let Some(&ch) = it.peek() {
if ch.is_ascii_digit() {
saw_digit = true;
it.next();
} else {
break;
}
}
if !saw_digit {
return false;
}
match it.peek() {
Some('.') => {
it.next();
matches!(it.peek(), Some(' '))
}
Some(' ') => true,
_ => false,
}
}
#[inline]
fn is_short_plain_word(s: &str) -> bool {
let t = s.trim();
if t.is_empty() || t.len() > 5 {
return false;
}
t.chars().all(|c| c.is_alphanumeric())
}
/// fence helpers are provided by `crate::render::markdown_utils`
#[cfg(test)]
fn unwrap_markdown_language_fence_if_enabled(s: String) -> String {
@@ -530,4 +660,191 @@ mod tests {
"heading should not merge with paragraph: {texts:?}"
);
}
#[test]
fn loose_list_with_split_dashes_matches_full_render() {
let cfg = test_config();
// Minimized failing sequence discovered by the helper: two chunks
// that still reproduce the mismatch.
let deltas = vec!["- item.\n\n", "-"];
let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
let streamed_strs = lines_to_plain_strings(&streamed);
let full: String = deltas.iter().copied().collect();
let mut rendered_all: Vec<ratatui::text::Line<'static>> = Vec::new();
crate::markdown::append_markdown(&full, &mut rendered_all, &cfg);
let rendered_all_strs = lines_to_plain_strings(&rendered_all);
assert_eq!(
streamed_strs, rendered_all_strs,
"streamed output should match full render without dangling '-' lines"
);
}
#[test]
fn loose_vs_tight_list_items_streaming_matches_full() {
let cfg = test_config();
// Deltas extracted from the session log around 2025-08-27T00:33:18.216Z
let deltas = vec![
"\n\n",
"Loose",
" vs",
".",
" tight",
" list",
" items",
":\n",
"1",
".",
" Tight",
" item",
"\n",
"2",
".",
" Another",
" tight",
" item",
"\n\n",
"1",
".",
" Loose",
" item",
" with",
" its",
" own",
" paragraph",
".\n\n",
" ",
" This",
" paragraph",
" belongs",
" to",
" the",
" same",
" list",
" item",
".\n\n",
"2",
".",
" Second",
" loose",
" item",
" with",
" a",
" nested",
" list",
" after",
" a",
" blank",
" line",
".\n\n",
" ",
" -",
" Nested",
" bullet",
" under",
" a",
" loose",
" item",
"\n",
" ",
" -",
" Another",
" nested",
" bullet",
"\n\n",
];
let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
let streamed_strs = lines_to_plain_strings(&streamed);
// Compute a full render for diagnostics only.
let full: String = deltas.iter().copied().collect();
let mut rendered_all: Vec<ratatui::text::Line<'static>> = Vec::new();
crate::markdown::append_markdown(&full, &mut rendered_all, &cfg);
// Also assert exact expected plain strings for clarity.
let expected = vec![
"Loose vs. tight list items:".to_string(),
"".to_string(),
"1. ".to_string(),
"Tight item".to_string(),
"2. ".to_string(),
"Another tight item".to_string(),
"3. ".to_string(),
"Loose item with its own paragraph.".to_string(),
"".to_string(),
"This paragraph belongs to the same list item.".to_string(),
"4. ".to_string(),
"Second loose item with a nested list after a blank line.".to_string(),
" - Nested bullet under a loose item".to_string(),
" - Another nested bullet".to_string(),
];
assert_eq!(
streamed_strs, expected,
"expected exact rendered lines for loose/tight section"
);
}
// Targeted tests derived from fuzz findings. Each asserts streamed == full render.
#[test]
fn fuzz_class_bare_dash_then_task_item() {
let cfg = test_config();
// Case similar to: ["two\n", "- \n* [x] done "]
let deltas = vec!["two\n", "- \n* [x] done \n"];
let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
let streamed_strs = lines_to_plain_strings(&streamed);
let full: String = deltas.iter().copied().collect();
let mut rendered: Vec<ratatui::text::Line<'static>> = Vec::new();
crate::markdown::append_markdown(&full, &mut rendered, &cfg);
let rendered_strs = lines_to_plain_strings(&rendered);
assert_eq!(streamed_strs, rendered_strs);
}
#[test]
fn fuzz_class_bullet_duplication_variant_1() {
let cfg = test_config();
// Case similar to: ["aph.\n- let one\n- bull", "et two\n\n second paragraph "]
let deltas = vec!["aph.\n- let one\n- bull", "et two\n\n second paragraph \n"];
let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
let streamed_strs = lines_to_plain_strings(&streamed);
let full: String = deltas.iter().copied().collect();
let mut rendered: Vec<ratatui::text::Line<'static>> = Vec::new();
crate::markdown::append_markdown(&full, &mut rendered, &cfg);
let rendered_strs = lines_to_plain_strings(&rendered);
assert_eq!(streamed_strs, rendered_strs);
}
#[test]
fn fuzz_class_bullet_duplication_variant_2() {
let cfg = test_config();
// Case similar to: ["- e\n c", "e\n- bullet two\n\n second paragraph in bullet two\n"]
let deltas = vec![
"- e\n c",
"e\n- bullet two\n\n second paragraph in bullet two\n",
];
let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
let streamed_strs = lines_to_plain_strings(&streamed);
let full: String = deltas.iter().copied().collect();
let mut rendered: Vec<ratatui::text::Line<'static>> = Vec::new();
crate::markdown::append_markdown(&full, &mut rendered, &cfg);
let rendered_strs = lines_to_plain_strings(&rendered);
assert_eq!(streamed_strs, rendered_strs);
}
#[test]
fn fuzz_class_ordered_list_split_weirdness() {
let cfg = test_config();
// Case similar to: ["one\n2", " two\n- \n* [x] d"]
let deltas = vec!["one\n2", " two\n- \n* [x] d\n"];
let streamed = simulate_stream_markdown_for_tests(&deltas, true, &cfg);
let streamed_strs = lines_to_plain_strings(&streamed);
let full: String = deltas.iter().copied().collect();
let mut rendered: Vec<ratatui::text::Line<'static>> = Vec::new();
crate::markdown::append_markdown(&full, &mut rendered, &cfg);
let rendered_strs = lines_to_plain_strings(&rendered);
assert_eq!(streamed_strs, rendered_strs);
}
}

View File

@@ -214,3 +214,189 @@ impl StreamController {
self.finalize(true, sink)
}
}
#[cfg(test)]
mod tests {
use super::*;
use codex_core::config::Config;
use codex_core::config::ConfigOverrides;
use std::cell::RefCell;
fn test_config() -> Config {
let overrides = ConfigOverrides {
cwd: std::env::current_dir().ok(),
..Default::default()
};
match Config::load_with_cli_overrides(vec![], overrides) {
Ok(c) => c,
Err(e) => panic!("load test config: {e}"),
}
}
struct TestSink {
pub lines: RefCell<Vec<Vec<Line<'static>>>>,
}
impl TestSink {
fn new() -> Self {
Self {
lines: RefCell::new(Vec::new()),
}
}
}
impl HistorySink for TestSink {
fn insert_history(&self, lines: Vec<Line<'static>>) {
self.lines.borrow_mut().push(lines);
}
fn start_commit_animation(&self) {}
fn stop_commit_animation(&self) {}
}
fn lines_to_plain_strings(lines: &[ratatui::text::Line<'_>]) -> Vec<String> {
lines
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<Vec<_>>()
.join("")
})
.collect()
}
#[test]
fn controller_loose_vs_tight_with_commit_ticks_matches_full() {
let cfg = test_config();
let mut ctrl = StreamController::new(cfg.clone());
let sink = TestSink::new();
ctrl.begin(&sink);
// Exact deltas from the session log (section: Loose vs. tight list items)
let deltas = vec![
"\n\n",
"Loose",
" vs",
".",
" tight",
" list",
" items",
":\n",
"1",
".",
" Tight",
" item",
"\n",
"2",
".",
" Another",
" tight",
" item",
"\n\n",
"1",
".",
" Loose",
" item",
" with",
" its",
" own",
" paragraph",
".\n\n",
" ",
" This",
" paragraph",
" belongs",
" to",
" the",
" same",
" list",
" item",
".\n\n",
"2",
".",
" Second",
" loose",
" item",
" with",
" a",
" nested",
" list",
" after",
" a",
" blank",
" line",
".\n\n",
" ",
" -",
" Nested",
" bullet",
" under",
" a",
" loose",
" item",
"\n",
" ",
" -",
" Another",
" nested",
" bullet",
"\n\n",
];
// Simulate streaming with a commit tick attempt after each delta.
for d in &deltas {
ctrl.push_and_maybe_commit(d, &sink);
let _ = ctrl.on_commit_tick(&sink);
}
// Finalize and flush remaining lines now.
let _ = ctrl.finalize(true, &sink);
// Flatten sink output and strip the header that the controller inserts (blank + "codex").
let mut flat: Vec<ratatui::text::Line<'static>> = Vec::new();
for batch in sink.lines.borrow().iter() {
for l in batch {
flat.push(l.clone());
}
}
// Drop leading blank and header line if present.
if !flat.is_empty() && lines_to_plain_strings(&[flat[0].clone()])[0].is_empty() {
flat.remove(0);
}
if !flat.is_empty() {
let s0 = lines_to_plain_strings(&[flat[0].clone()])[0].clone();
if s0 == "codex" {
flat.remove(0);
}
}
let streamed = lines_to_plain_strings(&flat);
// Full render of the same source
let source: String = deltas.iter().copied().collect();
let mut rendered: Vec<ratatui::text::Line<'static>> = Vec::new();
crate::markdown::append_markdown(&source, &mut rendered, &cfg);
let rendered_strs = lines_to_plain_strings(&rendered);
assert_eq!(streamed, rendered_strs);
// Also assert exact expected plain strings for clarity.
let expected = vec![
"Loose vs. tight list items:".to_string(),
"".to_string(),
"1. ".to_string(),
"Tight item".to_string(),
"2. ".to_string(),
"Another tight item".to_string(),
"3. ".to_string(),
"Loose item with its own paragraph.".to_string(),
"".to_string(),
"This paragraph belongs to the same list item.".to_string(),
"4. ".to_string(),
"Second loose item with a nested list after a blank line.".to_string(),
" - Nested bullet under a loose item".to_string(),
" - Another nested bullet".to_string(),
];
assert_eq!(
streamed, expected,
"expected exact rendered lines for loose/tight section"
);
}
}