Files
llmx/codex-rs/tui/src/markdown.rs
Jeremy Rose 578ff09e17 prefer ratatui Stylized for constructing lines/spans (#3068)
no functional change, just simplifying ratatui styling and adding
guidance in AGENTS.md for future.
2025-09-02 23:19:54 +00:00

582 lines
21 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use crate::citation_regex::CITATION_REGEX;
use codex_core::config::Config;
use codex_core::config_types::UriBasedFileOpener;
use ratatui::text::Line;
use std::borrow::Cow;
use std::path::Path;
pub(crate) fn append_markdown(
markdown_source: &str,
lines: &mut Vec<Line<'static>>,
config: &Config,
) {
append_markdown_with_opener_and_cwd(markdown_source, lines, config.file_opener, &config.cwd);
}
fn append_markdown_with_opener_and_cwd(
markdown_source: &str,
lines: &mut Vec<Line<'static>>,
file_opener: UriBasedFileOpener,
cwd: &Path,
) {
// Historically, we fed the entire `markdown_source` into the renderer in
// one pass. However, fenced code blocks sometimes lost leading whitespace
// when formatted by the markdown renderer/highlighter. To preserve code
// block content exactly, split the source into "text" and "code" segments:
// - Render non-code text through `tui_markdown` (with citation rewrite).
// - Render code block content verbatim as plain lines without additional
// formatting, preserving leading spaces.
for seg in split_text_and_fences(markdown_source) {
match seg {
Segment::Text(s) => {
let processed = rewrite_file_citations(&s, file_opener, cwd);
let rendered = tui_markdown::from_str(&processed);
crate::render::line_utils::push_owned_lines(&rendered.lines, lines);
}
Segment::Code { content, .. } => {
// Emit the code content exactly as-is, line by line.
// We don't attempt syntax highlighting to avoid whitespace bugs.
for line in content.split_inclusive('\n') {
// split_inclusive keeps the trailing \n; we want lines without it.
let line = if let Some(stripped) = line.strip_suffix('\n') {
stripped
} else {
line
};
let owned_line: Line<'static> = line.to_string().into();
lines.push(owned_line);
}
}
}
}
}
/// Rewrites file citations in `src` into markdown hyperlinks using the
/// provided `scheme` (`vscode`, `cursor`, etc.). The resulting URI follows the
/// format expected by VS Code-compatible file openers:
///
/// ```text
/// <scheme>://file<ABS_PATH>:<LINE>
/// ```
fn rewrite_file_citations<'a>(
src: &'a str,
file_opener: UriBasedFileOpener,
cwd: &Path,
) -> Cow<'a, str> {
// Map enum values to the corresponding URI scheme strings.
let scheme: &str = match file_opener.get_scheme() {
Some(scheme) => scheme,
None => return Cow::Borrowed(src),
};
CITATION_REGEX.replace_all(src, |caps: &regex_lite::Captures<'_>| {
let file = &caps[1];
let start_line = &caps[2];
// Resolve the path against `cwd` when it is relative.
let absolute_path = {
let p = Path::new(file);
let absolute_path = if p.is_absolute() {
path_clean::clean(p)
} else {
path_clean::clean(cwd.join(p))
};
// VS Code expects forward slashes even on Windows because URIs use
// `/` as the path separator.
absolute_path.to_string_lossy().replace('\\', "/")
};
// Render as a normal markdown link so the downstream renderer emits
// the hyperlink escape sequence (when supported by the terminal).
//
// In practice, sometimes multiple citations for the same file, but with a
// different line number, are shown sequentially, so we:
// - include the line number in the label to disambiguate them
// - add a space after the link to make it easier to read
format!("[{file}:{start_line}]({scheme}://file{absolute_path}:{start_line}) ")
})
}
// use shared helper from `line_utils`
// Minimal code block splitting.
// - Recognizes fenced blocks opened by ``` or ~~~ (allowing leading whitespace).
// The opening fence may include a language string which we ignore.
// The closing fence must be on its own line (ignoring surrounding whitespace).
// - Additionally recognizes indented code blocks that begin after a blank line
// with a line starting with at least 4 spaces or a tab, and continue for
// consecutive lines that are blank or also indented by >= 4 spaces or a tab.
enum Segment {
Text(String),
Code {
_lang: Option<String>,
content: String,
},
}
fn split_text_and_fences(src: &str) -> Vec<Segment> {
let mut segments = Vec::new();
let mut curr_text = String::new();
#[derive(Copy, Clone, PartialEq)]
enum CodeMode {
None,
Fenced,
Indented,
}
let mut code_mode = CodeMode::None;
let mut fence_token = "";
let mut code_lang: Option<String> = None;
let mut code_content = String::new();
// We intentionally do not require a preceding blank line for indented code blocks,
// since streamed model output often omits it. This favors preserving indentation.
for line in src.split_inclusive('\n') {
let line_no_nl = line.strip_suffix('\n');
let trimmed_start = match line_no_nl {
Some(l) => l.trim_start(),
None => line.trim_start(),
};
if code_mode == CodeMode::None {
let open = if trimmed_start.starts_with("```") {
Some("```")
} else if trimmed_start.starts_with("~~~") {
Some("~~~")
} else {
None
};
if let Some(tok) = open {
// Flush pending text segment.
if !curr_text.is_empty() {
segments.push(Segment::Text(curr_text.clone()));
curr_text.clear();
}
fence_token = tok;
// Capture language after the token on this line (before newline).
let after = &trimmed_start[tok.len()..];
let lang = after.trim();
code_lang = if lang.is_empty() {
None
} else {
Some(lang.to_string())
};
code_mode = CodeMode::Fenced;
code_content.clear();
// Do not include the opening fence line in output.
continue;
}
// Check for start of an indented code block: only after a blank line
// (or at the beginning), and the line must start with >=4 spaces or a tab.
let raw_line = match line_no_nl {
Some(l) => l,
None => line,
};
let leading_spaces = raw_line.chars().take_while(|c| *c == ' ').count();
let starts_with_tab = raw_line.starts_with('\t');
// Consider any line that begins with >=4 spaces or a tab to start an
// indented code block. This favors preserving indentation even when a
// preceding blank line is omitted (common in streamed model output).
let starts_indented_code = (leading_spaces >= 4) || starts_with_tab;
if starts_indented_code {
// Flush pending text and begin an indented code block.
if !curr_text.is_empty() {
segments.push(Segment::Text(curr_text.clone()));
curr_text.clear();
}
code_mode = CodeMode::Indented;
code_content.clear();
code_content.push_str(line);
// Inside code now; do not treat this line as normal text.
continue;
}
// Normal text line.
curr_text.push_str(line);
} else {
match code_mode {
CodeMode::Fenced => {
// inside fenced code: check for closing fence on its own line
let trimmed = match line_no_nl {
Some(l) => l.trim(),
None => line.trim(),
};
if trimmed == fence_token {
// End code block: emit segment without fences
segments.push(Segment::Code {
_lang: code_lang.take(),
content: code_content.clone(),
});
code_content.clear();
code_mode = CodeMode::None;
fence_token = "";
continue;
}
// Accumulate code content exactly as-is.
code_content.push_str(line);
}
CodeMode::Indented => {
// Continue while the line is blank, or starts with >=4 spaces, or a tab.
let raw_line = match line_no_nl {
Some(l) => l,
None => line,
};
let is_blank = raw_line.trim().is_empty();
let leading_spaces = raw_line.chars().take_while(|c| *c == ' ').count();
let starts_with_tab = raw_line.starts_with('\t');
if is_blank || leading_spaces >= 4 || starts_with_tab {
code_content.push_str(line);
} else {
// Close the indented code block and reprocess this line as normal text.
segments.push(Segment::Code {
_lang: None,
content: code_content.clone(),
});
code_content.clear();
code_mode = CodeMode::None;
// Now handle current line as text.
curr_text.push_str(line);
}
}
CodeMode::None => unreachable!(),
}
}
}
if code_mode != CodeMode::None {
// Unterminated code fence: treat accumulated content as a code segment.
segments.push(Segment::Code {
_lang: code_lang.take(),
content: code_content.clone(),
});
} else if !curr_text.is_empty() {
segments.push(Segment::Text(curr_text.clone()));
}
segments
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn citation_is_rewritten_with_absolute_path() {
let markdown = "See 【F:/src/main.rs†L42-L50】 for details.";
let cwd = Path::new("/workspace");
let result = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
assert_eq!(
"See [/src/main.rs:42](vscode://file/src/main.rs:42) for details.",
result
);
}
#[test]
fn citation_is_rewritten_with_relative_path() {
let markdown = "Refer to 【F:lib/mod.rs†L5】 here.";
let cwd = Path::new("/home/user/project");
let result = rewrite_file_citations(markdown, UriBasedFileOpener::Windsurf, cwd);
assert_eq!(
"Refer to [lib/mod.rs:5](windsurf://file/home/user/project/lib/mod.rs:5) here.",
result
);
}
#[test]
fn citation_followed_by_space_so_they_do_not_run_together() {
let markdown = "References on lines 【F:src/foo.rs†L24】【F:src/foo.rs†L42】";
let cwd = Path::new("/home/user/project");
let result = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
assert_eq!(
"References on lines [src/foo.rs:24](vscode://file/home/user/project/src/foo.rs:24) [src/foo.rs:42](vscode://file/home/user/project/src/foo.rs:42) ",
result
);
}
#[test]
fn citation_unchanged_without_file_opener() {
let markdown = "Look at 【F:file.rs†L1】.";
let cwd = Path::new("/");
let unchanged = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
// The helper itself always rewrites this test validates behaviour of
// append_markdown when `file_opener` is None.
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(markdown, &mut out, UriBasedFileOpener::None, cwd);
// Convert lines back to string for comparison.
let rendered: String = out
.iter()
.flat_map(|l| l.spans.iter())
.map(|s| s.content.clone())
.collect::<Vec<_>>()
.join("");
assert_eq!(markdown, rendered);
// Ensure helper rewrites.
assert_ne!(markdown, unchanged);
}
#[test]
fn fenced_code_blocks_preserve_leading_whitespace() {
let src = "```\n indented\n\t\twith tabs\n four spaces\n```\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
let rendered: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
assert_eq!(
rendered,
vec![
" indented".to_string(),
"\t\twith tabs".to_string(),
" four spaces".to_string()
]
);
}
#[test]
fn citations_not_rewritten_inside_code_blocks() {
let src = "Before 【F:/x.rs†L1】\n```\nInside 【F:/x.rs†L2】\n```\nAfter 【F:/x.rs†L3】\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::VsCode, cwd);
let rendered: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
// Expect first and last lines rewritten, middle line unchanged.
assert!(rendered[0].contains("vscode://file"));
assert_eq!(rendered[1], "Inside 【F:/x.rs†L2】");
assert!(matches!(rendered.last(), Some(s) if s.contains("vscode://file")));
}
#[test]
fn indented_code_blocks_preserve_leading_whitespace() {
let src = "Before\n code 1\n\tcode with tab\n code 2\nAfter\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
let rendered: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
assert_eq!(
rendered,
vec![
"Before".to_string(),
" code 1".to_string(),
"\tcode with tab".to_string(),
" code 2".to_string(),
"After".to_string()
]
);
}
#[test]
fn citations_not_rewritten_inside_indented_code_blocks() {
let src = "Start 【F:/x.rs†L1】\n\n Inside 【F:/x.rs†L2】\n\nEnd 【F:/x.rs†L3】\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::VsCode, cwd);
let rendered: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
// Expect first and last lines rewritten, and the indented code line present
// unchanged (citations inside not rewritten). We do not assert on blank
// separator lines since the markdown renderer may normalize them.
assert!(rendered.iter().any(|s| s.contains("vscode://file")));
assert!(rendered.iter().any(|s| s == " Inside 【F:/x.rs†L2】"));
}
#[test]
fn append_markdown_preserves_full_text_line() {
use codex_core::config_types::UriBasedFileOpener;
use std::path::Path;
let src = "Hi! How can I help with codex-rs today? Want me to explore the repo, run tests, or work on a specific change?\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
assert_eq!(
out.len(),
1,
"expected a single rendered line for plain text"
);
let rendered: String = out
.iter()
.flat_map(|l| l.spans.iter())
.map(|s| s.content.clone())
.collect::<Vec<_>>()
.join("");
assert_eq!(
rendered,
"Hi! How can I help with codex-rs today? Want me to explore the repo, run tests, or work on a specific change?"
);
}
#[test]
fn tui_markdown_splits_ordered_marker_and_text() {
// With marker and content on the same line, tui_markdown keeps it as one line
// even in the surrounding section context.
let rendered = tui_markdown::from_str("Loose vs. tight list items:\n1. Tight item\n");
let lines: Vec<String> = rendered
.lines
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
assert!(
lines.iter().any(|w| w == "1. Tight item"),
"expected single line '1. Tight item' in context: {lines:?}"
);
}
#[test]
fn append_markdown_matches_tui_markdown_for_ordered_item() {
use codex_core::config_types::UriBasedFileOpener;
use std::path::Path;
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(
"1. Tight item\n",
&mut out,
UriBasedFileOpener::None,
cwd,
);
let lines: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
assert_eq!(lines, vec!["1. Tight item".to_string()]);
}
#[test]
fn tui_markdown_shape_for_loose_tight_section() {
// Use the exact source from the session deltas used in tests.
let source = r#"
Loose vs. tight list items:
1. Tight item
2. Another tight item
3.
Loose item
"#;
let rendered = tui_markdown::from_str(source);
let lines: Vec<String> = rendered
.lines
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
// Join into a single string and assert the exact shape we observe
// from tui_markdown in this larger context (marker and content split).
let joined = {
let mut s = String::new();
for (i, l) in lines.iter().enumerate() {
s.push_str(l);
if i + 1 < lines.len() {
s.push('\n');
}
}
s
};
let expected = r#"Loose vs. tight list items:
1.
Tight item
2.
Another tight item
3.
Loose item"#;
assert_eq!(
joined, expected,
"unexpected tui_markdown shape: {joined:?}"
);
}
#[test]
fn split_text_and_fences_keeps_ordered_list_line_as_text() {
// No fences here; expect a single Text segment containing the full input.
let src = "Loose vs. tight list items:\n1. Tight item\n";
let segs = super::split_text_and_fences(src);
assert_eq!(
segs.len(),
1,
"expected single text segment, got {}",
segs.len()
);
match &segs[0] {
super::Segment::Text(s) => assert_eq!(s, src),
_ => panic!("expected Text segment for non-fence input"),
}
}
#[test]
fn append_markdown_keeps_ordered_list_line_unsplit_in_context() {
use codex_core::config_types::UriBasedFileOpener;
use std::path::Path;
let src = "Loose vs. tight list items:\n1. Tight item\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
let lines: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
// Expect to find the ordered list line rendered as a single line,
// not split into a marker-only line followed by the text.
assert!(
lines.iter().any(|s| s == "1. Tight item"),
"expected '1. Tight item' rendered as a single line; got: {lines:?}"
);
assert!(
!lines
.windows(2)
.any(|w| w[0].trim_end() == "1." && w[1] == "Tight item"),
"did not expect a split into ['1.', 'Tight item']; got: {lines:?}"
);
}
}