Files
llmx/codex-rs/tui/src/markdown.rs

582 lines
21 KiB
Rust
Raw Normal View History

use crate::citation_regex::CITATION_REGEX;
use codex_core::config::Config;
use codex_core::config_types::UriBasedFileOpener;
feat: introduce the use of tui-markdown (#851) This introduces the use of the `tui-markdown` crate to parse an assistant message as Markdown and style it using ANSI for a better user experience. As shown in the screenshot below, it has support for syntax highlighting for _tagged_ fenced code blocks: <img width="907" alt="image" src="https://github.com/user-attachments/assets/900dc229-80bb-46e8-b1bb-efee4c70ba3c" /> That said, `tui-markdown` is not as configurable (or stylish!) as https://www.npmjs.com/package/marked-terminal, which is what we use in the TypeScript CLI. In particular: * The styles are hardcoded and `tui_markdown::from_str()` does not take any options whatsoever. It uses "bold white" for inline code style which does not stand out as much as the yellow used by `marked-terminal`: https://github.com/joshka/tui-markdown/blob/65402cbda70325f34e7ddf6fe1ec629bcd9459cf/tui-markdown/src/lib.rs#L464 I asked Codex to take a first pass at this and it came up with: https://github.com/joshka/tui-markdown/pull/80 * If a fenced code block is not tagged, then it does not get highlighted. I would rather add some logic here: https://github.com/joshka/tui-markdown/blob/65402cbda70325f34e7ddf6fe1ec629bcd9459cf/tui-markdown/src/lib.rs#L262 that uses something like https://pypi.org/project/guesslang/ to examine the value of `text` and try to use the appropriate syntax highlighter. * When we have a fenced code block, we do not want to show the opening and closing triple backticks in the output. To unblock ourselves, we might want to bundle our own fork of `tui-markdown` temporarily until we figure out what the shape of the API should be and then try to upstream it.
2025-05-07 10:46:32 -07:00
use ratatui::text::Line;
use std::borrow::Cow;
use std::path::Path;
feat: introduce the use of tui-markdown (#851) This introduces the use of the `tui-markdown` crate to parse an assistant message as Markdown and style it using ANSI for a better user experience. As shown in the screenshot below, it has support for syntax highlighting for _tagged_ fenced code blocks: <img width="907" alt="image" src="https://github.com/user-attachments/assets/900dc229-80bb-46e8-b1bb-efee4c70ba3c" /> That said, `tui-markdown` is not as configurable (or stylish!) as https://www.npmjs.com/package/marked-terminal, which is what we use in the TypeScript CLI. In particular: * The styles are hardcoded and `tui_markdown::from_str()` does not take any options whatsoever. It uses "bold white" for inline code style which does not stand out as much as the yellow used by `marked-terminal`: https://github.com/joshka/tui-markdown/blob/65402cbda70325f34e7ddf6fe1ec629bcd9459cf/tui-markdown/src/lib.rs#L464 I asked Codex to take a first pass at this and it came up with: https://github.com/joshka/tui-markdown/pull/80 * If a fenced code block is not tagged, then it does not get highlighted. I would rather add some logic here: https://github.com/joshka/tui-markdown/blob/65402cbda70325f34e7ddf6fe1ec629bcd9459cf/tui-markdown/src/lib.rs#L262 that uses something like https://pypi.org/project/guesslang/ to examine the value of `text` and try to use the appropriate syntax highlighter. * When we have a fenced code block, we do not want to show the opening and closing triple backticks in the output. To unblock ourselves, we might want to bundle our own fork of `tui-markdown` temporarily until we figure out what the shape of the API should be and then try to upstream it.
2025-05-07 10:46:32 -07:00
pub(crate) fn append_markdown(
markdown_source: &str,
lines: &mut Vec<Line<'static>>,
config: &Config,
) {
append_markdown_with_opener_and_cwd(markdown_source, lines, config.file_opener, &config.cwd);
}
fn append_markdown_with_opener_and_cwd(
markdown_source: &str,
lines: &mut Vec<Line<'static>>,
file_opener: UriBasedFileOpener,
cwd: &Path,
) {
// Historically, we fed the entire `markdown_source` into the renderer in
// one pass. However, fenced code blocks sometimes lost leading whitespace
// when formatted by the markdown renderer/highlighter. To preserve code
// block content exactly, split the source into "text" and "code" segments:
// - Render non-code text through `tui_markdown` (with citation rewrite).
// - Render code block content verbatim as plain lines without additional
// formatting, preserving leading spaces.
for seg in split_text_and_fences(markdown_source) {
match seg {
Segment::Text(s) => {
let processed = rewrite_file_citations(&s, file_opener, cwd);
let rendered = tui_markdown::from_str(&processed);
crate::render::line_utils::push_owned_lines(&rendered.lines, lines);
}
Segment::Code { content, .. } => {
// Emit the code content exactly as-is, line by line.
// We don't attempt syntax highlighting to avoid whitespace bugs.
for line in content.split_inclusive('\n') {
// split_inclusive keeps the trailing \n; we want lines without it.
let line = if let Some(stripped) = line.strip_suffix('\n') {
stripped
} else {
line
};
let owned_line: Line<'static> = line.to_string().into();
lines.push(owned_line);
}
}
feat: introduce the use of tui-markdown (#851) This introduces the use of the `tui-markdown` crate to parse an assistant message as Markdown and style it using ANSI for a better user experience. As shown in the screenshot below, it has support for syntax highlighting for _tagged_ fenced code blocks: <img width="907" alt="image" src="https://github.com/user-attachments/assets/900dc229-80bb-46e8-b1bb-efee4c70ba3c" /> That said, `tui-markdown` is not as configurable (or stylish!) as https://www.npmjs.com/package/marked-terminal, which is what we use in the TypeScript CLI. In particular: * The styles are hardcoded and `tui_markdown::from_str()` does not take any options whatsoever. It uses "bold white" for inline code style which does not stand out as much as the yellow used by `marked-terminal`: https://github.com/joshka/tui-markdown/blob/65402cbda70325f34e7ddf6fe1ec629bcd9459cf/tui-markdown/src/lib.rs#L464 I asked Codex to take a first pass at this and it came up with: https://github.com/joshka/tui-markdown/pull/80 * If a fenced code block is not tagged, then it does not get highlighted. I would rather add some logic here: https://github.com/joshka/tui-markdown/blob/65402cbda70325f34e7ddf6fe1ec629bcd9459cf/tui-markdown/src/lib.rs#L262 that uses something like https://pypi.org/project/guesslang/ to examine the value of `text` and try to use the appropriate syntax highlighter. * When we have a fenced code block, we do not want to show the opening and closing triple backticks in the output. To unblock ourselves, we might want to bundle our own fork of `tui-markdown` temporarily until we figure out what the shape of the API should be and then try to upstream it.
2025-05-07 10:46:32 -07:00
}
}
}
/// Rewrites file citations in `src` into markdown hyperlinks using the
/// provided `scheme` (`vscode`, `cursor`, etc.). The resulting URI follows the
/// format expected by VS Code-compatible file openers:
///
/// ```text
/// <scheme>://file<ABS_PATH>:<LINE>
/// ```
fn rewrite_file_citations<'a>(
src: &'a str,
file_opener: UriBasedFileOpener,
cwd: &Path,
) -> Cow<'a, str> {
// Map enum values to the corresponding URI scheme strings.
let scheme: &str = match file_opener.get_scheme() {
Some(scheme) => scheme,
None => return Cow::Borrowed(src),
};
CITATION_REGEX.replace_all(src, |caps: &regex_lite::Captures<'_>| {
let file = &caps[1];
let start_line = &caps[2];
// Resolve the path against `cwd` when it is relative.
let absolute_path = {
let p = Path::new(file);
let absolute_path = if p.is_absolute() {
path_clean::clean(p)
} else {
path_clean::clean(cwd.join(p))
};
// VS Code expects forward slashes even on Windows because URIs use
// `/` as the path separator.
absolute_path.to_string_lossy().replace('\\', "/")
};
// Render as a normal markdown link so the downstream renderer emits
// the hyperlink escape sequence (when supported by the terminal).
//
// In practice, sometimes multiple citations for the same file, but with a
// different line number, are shown sequentially, so we:
// - include the line number in the label to disambiguate them
// - add a space after the link to make it easier to read
format!("[{file}:{start_line}]({scheme}://file{absolute_path}:{start_line}) ")
})
}
// use shared helper from `line_utils`
// Minimal code block splitting.
// - Recognizes fenced blocks opened by ``` or ~~~ (allowing leading whitespace).
// The opening fence may include a language string which we ignore.
// The closing fence must be on its own line (ignoring surrounding whitespace).
// - Additionally recognizes indented code blocks that begin after a blank line
// with a line starting with at least 4 spaces or a tab, and continue for
// consecutive lines that are blank or also indented by >= 4 spaces or a tab.
enum Segment {
Text(String),
Code {
_lang: Option<String>,
content: String,
},
}
fn split_text_and_fences(src: &str) -> Vec<Segment> {
let mut segments = Vec::new();
let mut curr_text = String::new();
#[derive(Copy, Clone, PartialEq)]
enum CodeMode {
None,
Fenced,
Indented,
}
let mut code_mode = CodeMode::None;
let mut fence_token = "";
let mut code_lang: Option<String> = None;
let mut code_content = String::new();
// We intentionally do not require a preceding blank line for indented code blocks,
// since streamed model output often omits it. This favors preserving indentation.
for line in src.split_inclusive('\n') {
let line_no_nl = line.strip_suffix('\n');
let trimmed_start = match line_no_nl {
Some(l) => l.trim_start(),
None => line.trim_start(),
};
if code_mode == CodeMode::None {
let open = if trimmed_start.starts_with("```") {
Some("```")
} else if trimmed_start.starts_with("~~~") {
Some("~~~")
} else {
None
};
if let Some(tok) = open {
// Flush pending text segment.
if !curr_text.is_empty() {
segments.push(Segment::Text(curr_text.clone()));
curr_text.clear();
}
fence_token = tok;
// Capture language after the token on this line (before newline).
let after = &trimmed_start[tok.len()..];
let lang = after.trim();
code_lang = if lang.is_empty() {
None
} else {
Some(lang.to_string())
};
code_mode = CodeMode::Fenced;
code_content.clear();
// Do not include the opening fence line in output.
continue;
}
// Check for start of an indented code block: only after a blank line
// (or at the beginning), and the line must start with >=4 spaces or a tab.
let raw_line = match line_no_nl {
Some(l) => l,
None => line,
};
let leading_spaces = raw_line.chars().take_while(|c| *c == ' ').count();
let starts_with_tab = raw_line.starts_with('\t');
// Consider any line that begins with >=4 spaces or a tab to start an
// indented code block. This favors preserving indentation even when a
// preceding blank line is omitted (common in streamed model output).
let starts_indented_code = (leading_spaces >= 4) || starts_with_tab;
if starts_indented_code {
// Flush pending text and begin an indented code block.
if !curr_text.is_empty() {
segments.push(Segment::Text(curr_text.clone()));
curr_text.clear();
}
code_mode = CodeMode::Indented;
code_content.clear();
code_content.push_str(line);
// Inside code now; do not treat this line as normal text.
continue;
}
// Normal text line.
curr_text.push_str(line);
} else {
match code_mode {
CodeMode::Fenced => {
// inside fenced code: check for closing fence on its own line
let trimmed = match line_no_nl {
Some(l) => l.trim(),
None => line.trim(),
};
if trimmed == fence_token {
// End code block: emit segment without fences
segments.push(Segment::Code {
_lang: code_lang.take(),
content: code_content.clone(),
});
code_content.clear();
code_mode = CodeMode::None;
fence_token = "";
continue;
}
// Accumulate code content exactly as-is.
code_content.push_str(line);
}
CodeMode::Indented => {
// Continue while the line is blank, or starts with >=4 spaces, or a tab.
let raw_line = match line_no_nl {
Some(l) => l,
None => line,
};
let is_blank = raw_line.trim().is_empty();
let leading_spaces = raw_line.chars().take_while(|c| *c == ' ').count();
let starts_with_tab = raw_line.starts_with('\t');
if is_blank || leading_spaces >= 4 || starts_with_tab {
code_content.push_str(line);
} else {
// Close the indented code block and reprocess this line as normal text.
segments.push(Segment::Code {
_lang: None,
content: code_content.clone(),
});
code_content.clear();
code_mode = CodeMode::None;
// Now handle current line as text.
curr_text.push_str(line);
}
}
CodeMode::None => unreachable!(),
}
}
}
if code_mode != CodeMode::None {
// Unterminated code fence: treat accumulated content as a code segment.
segments.push(Segment::Code {
_lang: code_lang.take(),
content: code_content.clone(),
});
} else if !curr_text.is_empty() {
segments.push(Segment::Text(curr_text.clone()));
}
segments
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn citation_is_rewritten_with_absolute_path() {
let markdown = "See 【F:/src/main.rs†L42-L50】 for details.";
let cwd = Path::new("/workspace");
let result = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
assert_eq!(
"See [/src/main.rs:42](vscode://file/src/main.rs:42) for details.",
result
);
}
#[test]
fn citation_is_rewritten_with_relative_path() {
let markdown = "Refer to 【F:lib/mod.rs†L5】 here.";
let cwd = Path::new("/home/user/project");
let result = rewrite_file_citations(markdown, UriBasedFileOpener::Windsurf, cwd);
assert_eq!(
"Refer to [lib/mod.rs:5](windsurf://file/home/user/project/lib/mod.rs:5) here.",
result
);
}
#[test]
fn citation_followed_by_space_so_they_do_not_run_together() {
let markdown = "References on lines 【F:src/foo.rs†L24】【F:src/foo.rs†L42】";
let cwd = Path::new("/home/user/project");
let result = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
assert_eq!(
"References on lines [src/foo.rs:24](vscode://file/home/user/project/src/foo.rs:24) [src/foo.rs:42](vscode://file/home/user/project/src/foo.rs:42) ",
result
);
}
#[test]
fn citation_unchanged_without_file_opener() {
let markdown = "Look at 【F:file.rs†L1】.";
let cwd = Path::new("/");
let unchanged = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
// The helper itself always rewrites this test validates behaviour of
// append_markdown when `file_opener` is None.
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(markdown, &mut out, UriBasedFileOpener::None, cwd);
// Convert lines back to string for comparison.
let rendered: String = out
.iter()
.flat_map(|l| l.spans.iter())
.map(|s| s.content.clone())
.collect::<Vec<_>>()
.join("");
assert_eq!(markdown, rendered);
// Ensure helper rewrites.
assert_ne!(markdown, unchanged);
}
#[test]
fn fenced_code_blocks_preserve_leading_whitespace() {
let src = "```\n indented\n\t\twith tabs\n four spaces\n```\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
let rendered: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
assert_eq!(
rendered,
vec![
" indented".to_string(),
"\t\twith tabs".to_string(),
" four spaces".to_string()
]
);
}
#[test]
fn citations_not_rewritten_inside_code_blocks() {
let src = "Before 【F:/x.rs†L1】\n```\nInside 【F:/x.rs†L2】\n```\nAfter 【F:/x.rs†L3】\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::VsCode, cwd);
let rendered: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
// Expect first and last lines rewritten, middle line unchanged.
assert!(rendered[0].contains("vscode://file"));
assert_eq!(rendered[1], "Inside 【F:/x.rs†L2】");
assert!(matches!(rendered.last(), Some(s) if s.contains("vscode://file")));
}
#[test]
fn indented_code_blocks_preserve_leading_whitespace() {
let src = "Before\n code 1\n\tcode with tab\n code 2\nAfter\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
let rendered: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
assert_eq!(
rendered,
vec![
"Before".to_string(),
" code 1".to_string(),
"\tcode with tab".to_string(),
" code 2".to_string(),
"After".to_string()
]
);
}
#[test]
fn citations_not_rewritten_inside_indented_code_blocks() {
let src = "Start 【F:/x.rs†L1】\n\n Inside 【F:/x.rs†L2】\n\nEnd 【F:/x.rs†L3】\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::VsCode, cwd);
let rendered: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
// Expect first and last lines rewritten, and the indented code line present
// unchanged (citations inside not rewritten). We do not assert on blank
// separator lines since the markdown renderer may normalize them.
assert!(rendered.iter().any(|s| s.contains("vscode://file")));
assert!(rendered.iter().any(|s| s == " Inside 【F:/x.rs†L2】"));
}
#[test]
fn append_markdown_preserves_full_text_line() {
use codex_core::config_types::UriBasedFileOpener;
use std::path::Path;
let src = "Hi! How can I help with codex-rs today? Want me to explore the repo, run tests, or work on a specific change?\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
assert_eq!(
out.len(),
1,
"expected a single rendered line for plain text"
);
let rendered: String = out
.iter()
.flat_map(|l| l.spans.iter())
.map(|s| s.content.clone())
.collect::<Vec<_>>()
.join("");
assert_eq!(
rendered,
"Hi! How can I help with codex-rs today? Want me to explore the repo, run tests, or work on a specific change?"
);
}
#[test]
fn tui_markdown_splits_ordered_marker_and_text() {
// With marker and content on the same line, tui_markdown keeps it as one line
// even in the surrounding section context.
let rendered = tui_markdown::from_str("Loose vs. tight list items:\n1. Tight item\n");
let lines: Vec<String> = rendered
.lines
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
assert!(
lines.iter().any(|w| w == "1. Tight item"),
"expected single line '1. Tight item' in context: {lines:?}"
);
}
#[test]
fn append_markdown_matches_tui_markdown_for_ordered_item() {
use codex_core::config_types::UriBasedFileOpener;
use std::path::Path;
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(
"1. Tight item\n",
&mut out,
UriBasedFileOpener::None,
cwd,
);
let lines: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
assert_eq!(lines, vec!["1. Tight item".to_string()]);
}
#[test]
fn tui_markdown_shape_for_loose_tight_section() {
// Use the exact source from the session deltas used in tests.
let source = r#"
Loose vs. tight list items:
1. Tight item
2. Another tight item
3.
Loose item
"#;
let rendered = tui_markdown::from_str(source);
let lines: Vec<String> = rendered
.lines
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
// Join into a single string and assert the exact shape we observe
// from tui_markdown in this larger context (marker and content split).
let joined = {
let mut s = String::new();
for (i, l) in lines.iter().enumerate() {
s.push_str(l);
if i + 1 < lines.len() {
s.push('\n');
}
}
s
};
let expected = r#"Loose vs. tight list items:
1.
Tight item
2.
Another tight item
3.
Loose item"#;
assert_eq!(
joined, expected,
"unexpected tui_markdown shape: {joined:?}"
);
}
#[test]
fn split_text_and_fences_keeps_ordered_list_line_as_text() {
// No fences here; expect a single Text segment containing the full input.
let src = "Loose vs. tight list items:\n1. Tight item\n";
let segs = super::split_text_and_fences(src);
assert_eq!(
segs.len(),
1,
"expected single text segment, got {}",
segs.len()
);
match &segs[0] {
super::Segment::Text(s) => assert_eq!(s, src),
_ => panic!("expected Text segment for non-fence input"),
}
}
#[test]
fn append_markdown_keeps_ordered_list_line_unsplit_in_context() {
use codex_core::config_types::UriBasedFileOpener;
use std::path::Path;
let src = "Loose vs. tight list items:\n1. Tight item\n";
let cwd = Path::new("/");
let mut out = Vec::new();
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
let lines: Vec<String> = out
.iter()
.map(|l| {
l.spans
.iter()
.map(|s| s.content.clone())
.collect::<String>()
})
.collect();
// Expect to find the ordered list line rendered as a single line,
// not split into a marker-only line followed by the text.
assert!(
lines.iter().any(|s| s == "1. Tight item"),
"expected '1. Tight item' rendered as a single line; got: {lines:?}"
);
assert!(
!lines
.windows(2)
.any(|w| w[0].trim_end() == "1." && w[1] == "Tight item"),
"did not expect a split into ['1.', 'Tight item']; got: {lines:?}"
);
}
}