replace tui_markdown with a custom markdown renderer (#3396)
Also, simplify the streaming behavior. This fixes a number of display issues with streaming markdown, and paves the way for better markdown features (e.g. customizable styles, syntax highlighting, markdown-aware wrapping). Not currently supported: - footnotes - tables - reference-style links
This commit is contained in:
@@ -1,8 +1,6 @@
|
||||
use crate::citation_regex::CITATION_REGEX;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config_types::UriBasedFileOpener;
|
||||
use ratatui::text::Line;
|
||||
use std::borrow::Cow;
|
||||
use std::path::Path;
|
||||
|
||||
pub(crate) fn append_markdown(
|
||||
@@ -19,238 +17,13 @@ fn append_markdown_with_opener_and_cwd(
|
||||
file_opener: UriBasedFileOpener,
|
||||
cwd: &Path,
|
||||
) {
|
||||
// Historically, we fed the entire `markdown_source` into the renderer in
|
||||
// one pass. However, fenced code blocks sometimes lost leading whitespace
|
||||
// when formatted by the markdown renderer/highlighter. To preserve code
|
||||
// block content exactly, split the source into "text" and "code" segments:
|
||||
// - Render non-code text through `tui_markdown` (with citation rewrite).
|
||||
// - Render code block content verbatim as plain lines without additional
|
||||
// formatting, preserving leading spaces.
|
||||
for seg in split_text_and_fences(markdown_source) {
|
||||
match seg {
|
||||
Segment::Text(s) => {
|
||||
let processed = rewrite_file_citations(&s, file_opener, cwd);
|
||||
let rendered = tui_markdown::from_str(&processed);
|
||||
crate::render::line_utils::push_owned_lines(&rendered.lines, lines);
|
||||
}
|
||||
Segment::Code { content, .. } => {
|
||||
// Emit the code content exactly as-is, line by line.
|
||||
// We don't attempt syntax highlighting to avoid whitespace bugs.
|
||||
for line in content.split_inclusive('\n') {
|
||||
// split_inclusive keeps the trailing \n; we want lines without it.
|
||||
let line = if let Some(stripped) = line.strip_suffix('\n') {
|
||||
stripped
|
||||
} else {
|
||||
line
|
||||
};
|
||||
let owned_line: Line<'static> = line.to_string().into();
|
||||
lines.push(owned_line);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Rewrites file citations in `src` into markdown hyperlinks using the
|
||||
/// provided `scheme` (`vscode`, `cursor`, etc.). The resulting URI follows the
|
||||
/// format expected by VS Code-compatible file openers:
|
||||
///
|
||||
/// ```text
|
||||
/// <scheme>://file<ABS_PATH>:<LINE>
|
||||
/// ```
|
||||
fn rewrite_file_citations<'a>(
|
||||
src: &'a str,
|
||||
file_opener: UriBasedFileOpener,
|
||||
cwd: &Path,
|
||||
) -> Cow<'a, str> {
|
||||
// Map enum values to the corresponding URI scheme strings.
|
||||
let scheme: &str = match file_opener.get_scheme() {
|
||||
Some(scheme) => scheme,
|
||||
None => return Cow::Borrowed(src),
|
||||
};
|
||||
|
||||
CITATION_REGEX.replace_all(src, |caps: ®ex_lite::Captures<'_>| {
|
||||
let file = &caps[1];
|
||||
let start_line = &caps[2];
|
||||
|
||||
// Resolve the path against `cwd` when it is relative.
|
||||
let absolute_path = {
|
||||
let p = Path::new(file);
|
||||
let absolute_path = if p.is_absolute() {
|
||||
path_clean::clean(p)
|
||||
} else {
|
||||
path_clean::clean(cwd.join(p))
|
||||
};
|
||||
// VS Code expects forward slashes even on Windows because URIs use
|
||||
// `/` as the path separator.
|
||||
absolute_path.to_string_lossy().replace('\\', "/")
|
||||
};
|
||||
|
||||
// Render as a normal markdown link so the downstream renderer emits
|
||||
// the hyperlink escape sequence (when supported by the terminal).
|
||||
//
|
||||
// In practice, sometimes multiple citations for the same file, but with a
|
||||
// different line number, are shown sequentially, so we:
|
||||
// - include the line number in the label to disambiguate them
|
||||
// - add a space after the link to make it easier to read
|
||||
format!("[{file}:{start_line}]({scheme}://file{absolute_path}:{start_line}) ")
|
||||
})
|
||||
}
|
||||
|
||||
// use shared helper from `line_utils`
|
||||
|
||||
// Minimal code block splitting.
|
||||
// - Recognizes fenced blocks opened by ``` or ~~~ (allowing leading whitespace).
|
||||
// The opening fence may include a language string which we ignore.
|
||||
// The closing fence must be on its own line (ignoring surrounding whitespace).
|
||||
// - Additionally recognizes indented code blocks that begin after a blank line
|
||||
// with a line starting with at least 4 spaces or a tab, and continue for
|
||||
// consecutive lines that are blank or also indented by >= 4 spaces or a tab.
|
||||
enum Segment {
|
||||
Text(String),
|
||||
Code {
|
||||
_lang: Option<String>,
|
||||
content: String,
|
||||
},
|
||||
}
|
||||
|
||||
fn split_text_and_fences(src: &str) -> Vec<Segment> {
|
||||
let mut segments = Vec::new();
|
||||
let mut curr_text = String::new();
|
||||
#[derive(Copy, Clone, PartialEq)]
|
||||
enum CodeMode {
|
||||
None,
|
||||
Fenced,
|
||||
Indented,
|
||||
}
|
||||
let mut code_mode = CodeMode::None;
|
||||
let mut fence_token = "";
|
||||
let mut code_lang: Option<String> = None;
|
||||
let mut code_content = String::new();
|
||||
// We intentionally do not require a preceding blank line for indented code blocks,
|
||||
// since streamed model output often omits it. This favors preserving indentation.
|
||||
|
||||
for line in src.split_inclusive('\n') {
|
||||
let line_no_nl = line.strip_suffix('\n');
|
||||
let trimmed_start = match line_no_nl {
|
||||
Some(l) => l.trim_start(),
|
||||
None => line.trim_start(),
|
||||
};
|
||||
if code_mode == CodeMode::None {
|
||||
let open = if trimmed_start.starts_with("```") {
|
||||
Some("```")
|
||||
} else if trimmed_start.starts_with("~~~") {
|
||||
Some("~~~")
|
||||
} else {
|
||||
None
|
||||
};
|
||||
if let Some(tok) = open {
|
||||
// Flush pending text segment.
|
||||
if !curr_text.is_empty() {
|
||||
segments.push(Segment::Text(curr_text.clone()));
|
||||
curr_text.clear();
|
||||
}
|
||||
fence_token = tok;
|
||||
// Capture language after the token on this line (before newline).
|
||||
let after = &trimmed_start[tok.len()..];
|
||||
let lang = after.trim();
|
||||
code_lang = if lang.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(lang.to_string())
|
||||
};
|
||||
code_mode = CodeMode::Fenced;
|
||||
code_content.clear();
|
||||
// Do not include the opening fence line in output.
|
||||
continue;
|
||||
}
|
||||
// Check for start of an indented code block: only after a blank line
|
||||
// (or at the beginning), and the line must start with >=4 spaces or a tab.
|
||||
let raw_line = match line_no_nl {
|
||||
Some(l) => l,
|
||||
None => line,
|
||||
};
|
||||
let leading_spaces = raw_line.chars().take_while(|c| *c == ' ').count();
|
||||
let starts_with_tab = raw_line.starts_with('\t');
|
||||
// Consider any line that begins with >=4 spaces or a tab to start an
|
||||
// indented code block. This favors preserving indentation even when a
|
||||
// preceding blank line is omitted (common in streamed model output).
|
||||
let starts_indented_code = (leading_spaces >= 4) || starts_with_tab;
|
||||
if starts_indented_code {
|
||||
// Flush pending text and begin an indented code block.
|
||||
if !curr_text.is_empty() {
|
||||
segments.push(Segment::Text(curr_text.clone()));
|
||||
curr_text.clear();
|
||||
}
|
||||
code_mode = CodeMode::Indented;
|
||||
code_content.clear();
|
||||
code_content.push_str(line);
|
||||
// Inside code now; do not treat this line as normal text.
|
||||
continue;
|
||||
}
|
||||
// Normal text line.
|
||||
curr_text.push_str(line);
|
||||
} else {
|
||||
match code_mode {
|
||||
CodeMode::Fenced => {
|
||||
// inside fenced code: check for closing fence on its own line
|
||||
let trimmed = match line_no_nl {
|
||||
Some(l) => l.trim(),
|
||||
None => line.trim(),
|
||||
};
|
||||
if trimmed == fence_token {
|
||||
// End code block: emit segment without fences
|
||||
segments.push(Segment::Code {
|
||||
_lang: code_lang.take(),
|
||||
content: code_content.clone(),
|
||||
});
|
||||
code_content.clear();
|
||||
code_mode = CodeMode::None;
|
||||
fence_token = "";
|
||||
continue;
|
||||
}
|
||||
// Accumulate code content exactly as-is.
|
||||
code_content.push_str(line);
|
||||
}
|
||||
CodeMode::Indented => {
|
||||
// Continue while the line is blank, or starts with >=4 spaces, or a tab.
|
||||
let raw_line = match line_no_nl {
|
||||
Some(l) => l,
|
||||
None => line,
|
||||
};
|
||||
let is_blank = raw_line.trim().is_empty();
|
||||
let leading_spaces = raw_line.chars().take_while(|c| *c == ' ').count();
|
||||
let starts_with_tab = raw_line.starts_with('\t');
|
||||
if is_blank || leading_spaces >= 4 || starts_with_tab {
|
||||
code_content.push_str(line);
|
||||
} else {
|
||||
// Close the indented code block and reprocess this line as normal text.
|
||||
segments.push(Segment::Code {
|
||||
_lang: None,
|
||||
content: code_content.clone(),
|
||||
});
|
||||
code_content.clear();
|
||||
code_mode = CodeMode::None;
|
||||
// Now handle current line as text.
|
||||
curr_text.push_str(line);
|
||||
}
|
||||
}
|
||||
CodeMode::None => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if code_mode != CodeMode::None {
|
||||
// Unterminated code fence: treat accumulated content as a code segment.
|
||||
segments.push(Segment::Code {
|
||||
_lang: code_lang.take(),
|
||||
content: code_content.clone(),
|
||||
});
|
||||
} else if !curr_text.is_empty() {
|
||||
segments.push(Segment::Text(curr_text.clone()));
|
||||
}
|
||||
|
||||
segments
|
||||
// Render via pulldown-cmark and rewrite citations during traversal (outside code blocks).
|
||||
let rendered = crate::markdown_render::render_markdown_text_with_citations(
|
||||
markdown_source,
|
||||
file_opener.get_scheme(),
|
||||
cwd,
|
||||
);
|
||||
crate::render::line_utils::push_owned_lines(&rendered.lines, lines);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -258,88 +31,6 @@ mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn citation_is_rewritten_with_absolute_path() {
|
||||
let markdown = "See 【F:/src/main.rs†L42-L50】 for details.";
|
||||
let cwd = Path::new("/workspace");
|
||||
let result = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
|
||||
|
||||
assert_eq!(
|
||||
"See [/src/main.rs:42](vscode://file/src/main.rs:42) for details.",
|
||||
result
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn citation_is_rewritten_with_relative_path() {
|
||||
let markdown = "Refer to 【F:lib/mod.rs†L5】 here.";
|
||||
let cwd = Path::new("/home/user/project");
|
||||
let result = rewrite_file_citations(markdown, UriBasedFileOpener::Windsurf, cwd);
|
||||
|
||||
assert_eq!(
|
||||
"Refer to [lib/mod.rs:5](windsurf://file/home/user/project/lib/mod.rs:5) here.",
|
||||
result
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn citation_followed_by_space_so_they_do_not_run_together() {
|
||||
let markdown = "References on lines 【F:src/foo.rs†L24】【F:src/foo.rs†L42】";
|
||||
let cwd = Path::new("/home/user/project");
|
||||
let result = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
|
||||
|
||||
assert_eq!(
|
||||
"References on lines [src/foo.rs:24](vscode://file/home/user/project/src/foo.rs:24) [src/foo.rs:42](vscode://file/home/user/project/src/foo.rs:42) ",
|
||||
result
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn citation_unchanged_without_file_opener() {
|
||||
let markdown = "Look at 【F:file.rs†L1】.";
|
||||
let cwd = Path::new("/");
|
||||
let unchanged = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
|
||||
// The helper itself always rewrites – this test validates behaviour of
|
||||
// append_markdown when `file_opener` is None.
|
||||
let mut out = Vec::new();
|
||||
append_markdown_with_opener_and_cwd(markdown, &mut out, UriBasedFileOpener::None, cwd);
|
||||
// Convert lines back to string for comparison.
|
||||
let rendered: String = out
|
||||
.iter()
|
||||
.flat_map(|l| l.spans.iter())
|
||||
.map(|s| s.content.clone())
|
||||
.collect::<Vec<_>>()
|
||||
.join("");
|
||||
assert_eq!(markdown, rendered);
|
||||
// Ensure helper rewrites.
|
||||
assert_ne!(markdown, unchanged);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fenced_code_blocks_preserve_leading_whitespace() {
|
||||
let src = "```\n indented\n\t\twith tabs\n four spaces\n```\n";
|
||||
let cwd = Path::new("/");
|
||||
let mut out = Vec::new();
|
||||
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
|
||||
let rendered: Vec<String> = out
|
||||
.iter()
|
||||
.map(|l| {
|
||||
l.spans
|
||||
.iter()
|
||||
.map(|s| s.content.clone())
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(
|
||||
rendered,
|
||||
vec![
|
||||
" indented".to_string(),
|
||||
"\t\twith tabs".to_string(),
|
||||
" four spaces".to_string()
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn citations_not_rewritten_inside_code_blocks() {
|
||||
let src = "Before 【F:/x.rs†L1】\n```\nInside 【F:/x.rs†L2】\n```\nAfter 【F:/x.rs†L3】\n";
|
||||
@@ -355,19 +46,31 @@ mod tests {
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect();
|
||||
// Expect first and last lines rewritten, middle line unchanged.
|
||||
assert!(rendered[0].contains("vscode://file"));
|
||||
assert_eq!(rendered[1], "Inside 【F:/x.rs†L2】");
|
||||
assert!(matches!(rendered.last(), Some(s) if s.contains("vscode://file")));
|
||||
// Expect a line containing the inside text unchanged.
|
||||
assert!(rendered.iter().any(|s| s.contains("Inside 【F:/x.rs†L2】")));
|
||||
// And first/last sections rewritten.
|
||||
assert!(
|
||||
rendered
|
||||
.first()
|
||||
.map(|s| s.contains("vscode://file"))
|
||||
.unwrap_or(false)
|
||||
);
|
||||
assert!(
|
||||
rendered
|
||||
.last()
|
||||
.map(|s| s.contains("vscode://file"))
|
||||
.unwrap_or(false)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn indented_code_blocks_preserve_leading_whitespace() {
|
||||
let src = "Before\n code 1\n\tcode with tab\n code 2\nAfter\n";
|
||||
// Basic sanity: indented code with surrounding blank lines should produce the indented line.
|
||||
let src = "Before\n\n code 1\n\nAfter\n";
|
||||
let cwd = Path::new("/");
|
||||
let mut out = Vec::new();
|
||||
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
|
||||
let rendered: Vec<String> = out
|
||||
let lines: Vec<String> = out
|
||||
.iter()
|
||||
.map(|l| {
|
||||
l.spans
|
||||
@@ -376,16 +79,7 @@ mod tests {
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(
|
||||
rendered,
|
||||
vec![
|
||||
"Before".to_string(),
|
||||
" code 1".to_string(),
|
||||
"\tcode with tab".to_string(),
|
||||
" code 2".to_string(),
|
||||
"After".to_string()
|
||||
]
|
||||
);
|
||||
assert_eq!(lines, vec!["Before", "", " code 1", "", "After"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -403,11 +97,17 @@ mod tests {
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect();
|
||||
// Expect first and last lines rewritten, and the indented code line present
|
||||
// unchanged (citations inside not rewritten). We do not assert on blank
|
||||
// separator lines since the markdown renderer may normalize them.
|
||||
assert!(rendered.iter().any(|s| s.contains("vscode://file")));
|
||||
assert!(rendered.iter().any(|s| s == " Inside 【F:/x.rs†L2】"));
|
||||
assert!(
|
||||
rendered
|
||||
.iter()
|
||||
.any(|s| s.contains("Start") && s.contains("vscode://file"))
|
||||
);
|
||||
assert!(
|
||||
rendered
|
||||
.iter()
|
||||
.any(|s| s.contains("End") && s.contains("vscode://file"))
|
||||
);
|
||||
assert!(rendered.iter().any(|s| s.contains("Inside 【F:/x.rs†L2】")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -435,27 +135,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tui_markdown_splits_ordered_marker_and_text() {
|
||||
// With marker and content on the same line, tui_markdown keeps it as one line
|
||||
// even in the surrounding section context.
|
||||
let rendered = tui_markdown::from_str("Loose vs. tight list items:\n1. Tight item\n");
|
||||
let lines: Vec<String> = rendered
|
||||
.lines
|
||||
.iter()
|
||||
.map(|l| {
|
||||
l.spans
|
||||
.iter()
|
||||
.map(|s| s.content.clone())
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
lines.iter().any(|w| w == "1. Tight item"),
|
||||
"expected single line '1. Tight item' in context: {lines:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn append_markdown_matches_tui_markdown_for_ordered_item() {
|
||||
use codex_core::config_types::UriBasedFileOpener;
|
||||
@@ -480,72 +159,6 @@ mod tests {
|
||||
assert_eq!(lines, vec!["1. Tight item".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tui_markdown_shape_for_loose_tight_section() {
|
||||
// Use the exact source from the session deltas used in tests.
|
||||
let source = r#"
|
||||
Loose vs. tight list items:
|
||||
1. Tight item
|
||||
2. Another tight item
|
||||
|
||||
3.
|
||||
Loose item
|
||||
"#;
|
||||
|
||||
let rendered = tui_markdown::from_str(source);
|
||||
let lines: Vec<String> = rendered
|
||||
.lines
|
||||
.iter()
|
||||
.map(|l| {
|
||||
l.spans
|
||||
.iter()
|
||||
.map(|s| s.content.clone())
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect();
|
||||
// Join into a single string and assert the exact shape we observe
|
||||
// from tui_markdown in this larger context (marker and content split).
|
||||
let joined = {
|
||||
let mut s = String::new();
|
||||
for (i, l) in lines.iter().enumerate() {
|
||||
s.push_str(l);
|
||||
if i + 1 < lines.len() {
|
||||
s.push('\n');
|
||||
}
|
||||
}
|
||||
s
|
||||
};
|
||||
let expected = r#"Loose vs. tight list items:
|
||||
|
||||
1.
|
||||
Tight item
|
||||
2.
|
||||
Another tight item
|
||||
3.
|
||||
Loose item"#;
|
||||
assert_eq!(
|
||||
joined, expected,
|
||||
"unexpected tui_markdown shape: {joined:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_text_and_fences_keeps_ordered_list_line_as_text() {
|
||||
// No fences here; expect a single Text segment containing the full input.
|
||||
let src = "Loose vs. tight list items:\n1. Tight item\n";
|
||||
let segs = super::split_text_and_fences(src);
|
||||
assert_eq!(
|
||||
segs.len(),
|
||||
1,
|
||||
"expected single text segment, got {}",
|
||||
segs.len()
|
||||
);
|
||||
match &segs[0] {
|
||||
super::Segment::Text(s) => assert_eq!(s, src),
|
||||
_ => panic!("expected Text segment for non-fence input"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn append_markdown_keeps_ordered_list_line_unsplit_in_context() {
|
||||
use codex_core::config_types::UriBasedFileOpener;
|
||||
|
||||
Reference in New Issue
Block a user