no functional change, just simplifying ratatui styling and adding guidance in AGENTS.md for future.
582 lines
21 KiB
Rust
582 lines
21 KiB
Rust
use crate::citation_regex::CITATION_REGEX;
|
||
use codex_core::config::Config;
|
||
use codex_core::config_types::UriBasedFileOpener;
|
||
use ratatui::text::Line;
|
||
use std::borrow::Cow;
|
||
use std::path::Path;
|
||
|
||
pub(crate) fn append_markdown(
|
||
markdown_source: &str,
|
||
lines: &mut Vec<Line<'static>>,
|
||
config: &Config,
|
||
) {
|
||
append_markdown_with_opener_and_cwd(markdown_source, lines, config.file_opener, &config.cwd);
|
||
}
|
||
|
||
fn append_markdown_with_opener_and_cwd(
|
||
markdown_source: &str,
|
||
lines: &mut Vec<Line<'static>>,
|
||
file_opener: UriBasedFileOpener,
|
||
cwd: &Path,
|
||
) {
|
||
// Historically, we fed the entire `markdown_source` into the renderer in
|
||
// one pass. However, fenced code blocks sometimes lost leading whitespace
|
||
// when formatted by the markdown renderer/highlighter. To preserve code
|
||
// block content exactly, split the source into "text" and "code" segments:
|
||
// - Render non-code text through `tui_markdown` (with citation rewrite).
|
||
// - Render code block content verbatim as plain lines without additional
|
||
// formatting, preserving leading spaces.
|
||
for seg in split_text_and_fences(markdown_source) {
|
||
match seg {
|
||
Segment::Text(s) => {
|
||
let processed = rewrite_file_citations(&s, file_opener, cwd);
|
||
let rendered = tui_markdown::from_str(&processed);
|
||
crate::render::line_utils::push_owned_lines(&rendered.lines, lines);
|
||
}
|
||
Segment::Code { content, .. } => {
|
||
// Emit the code content exactly as-is, line by line.
|
||
// We don't attempt syntax highlighting to avoid whitespace bugs.
|
||
for line in content.split_inclusive('\n') {
|
||
// split_inclusive keeps the trailing \n; we want lines without it.
|
||
let line = if let Some(stripped) = line.strip_suffix('\n') {
|
||
stripped
|
||
} else {
|
||
line
|
||
};
|
||
let owned_line: Line<'static> = line.to_string().into();
|
||
lines.push(owned_line);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Rewrites file citations in `src` into markdown hyperlinks using the
|
||
/// provided `scheme` (`vscode`, `cursor`, etc.). The resulting URI follows the
|
||
/// format expected by VS Code-compatible file openers:
|
||
///
|
||
/// ```text
|
||
/// <scheme>://file<ABS_PATH>:<LINE>
|
||
/// ```
|
||
fn rewrite_file_citations<'a>(
|
||
src: &'a str,
|
||
file_opener: UriBasedFileOpener,
|
||
cwd: &Path,
|
||
) -> Cow<'a, str> {
|
||
// Map enum values to the corresponding URI scheme strings.
|
||
let scheme: &str = match file_opener.get_scheme() {
|
||
Some(scheme) => scheme,
|
||
None => return Cow::Borrowed(src),
|
||
};
|
||
|
||
CITATION_REGEX.replace_all(src, |caps: ®ex_lite::Captures<'_>| {
|
||
let file = &caps[1];
|
||
let start_line = &caps[2];
|
||
|
||
// Resolve the path against `cwd` when it is relative.
|
||
let absolute_path = {
|
||
let p = Path::new(file);
|
||
let absolute_path = if p.is_absolute() {
|
||
path_clean::clean(p)
|
||
} else {
|
||
path_clean::clean(cwd.join(p))
|
||
};
|
||
// VS Code expects forward slashes even on Windows because URIs use
|
||
// `/` as the path separator.
|
||
absolute_path.to_string_lossy().replace('\\', "/")
|
||
};
|
||
|
||
// Render as a normal markdown link so the downstream renderer emits
|
||
// the hyperlink escape sequence (when supported by the terminal).
|
||
//
|
||
// In practice, sometimes multiple citations for the same file, but with a
|
||
// different line number, are shown sequentially, so we:
|
||
// - include the line number in the label to disambiguate them
|
||
// - add a space after the link to make it easier to read
|
||
format!("[{file}:{start_line}]({scheme}://file{absolute_path}:{start_line}) ")
|
||
})
|
||
}
|
||
|
||
// use shared helper from `line_utils`
|
||
|
||
// Minimal code block splitting.
|
||
// - Recognizes fenced blocks opened by ``` or ~~~ (allowing leading whitespace).
|
||
// The opening fence may include a language string which we ignore.
|
||
// The closing fence must be on its own line (ignoring surrounding whitespace).
|
||
// - Additionally recognizes indented code blocks that begin after a blank line
|
||
// with a line starting with at least 4 spaces or a tab, and continue for
|
||
// consecutive lines that are blank or also indented by >= 4 spaces or a tab.
|
||
enum Segment {
|
||
Text(String),
|
||
Code {
|
||
_lang: Option<String>,
|
||
content: String,
|
||
},
|
||
}
|
||
|
||
fn split_text_and_fences(src: &str) -> Vec<Segment> {
|
||
let mut segments = Vec::new();
|
||
let mut curr_text = String::new();
|
||
#[derive(Copy, Clone, PartialEq)]
|
||
enum CodeMode {
|
||
None,
|
||
Fenced,
|
||
Indented,
|
||
}
|
||
let mut code_mode = CodeMode::None;
|
||
let mut fence_token = "";
|
||
let mut code_lang: Option<String> = None;
|
||
let mut code_content = String::new();
|
||
// We intentionally do not require a preceding blank line for indented code blocks,
|
||
// since streamed model output often omits it. This favors preserving indentation.
|
||
|
||
for line in src.split_inclusive('\n') {
|
||
let line_no_nl = line.strip_suffix('\n');
|
||
let trimmed_start = match line_no_nl {
|
||
Some(l) => l.trim_start(),
|
||
None => line.trim_start(),
|
||
};
|
||
if code_mode == CodeMode::None {
|
||
let open = if trimmed_start.starts_with("```") {
|
||
Some("```")
|
||
} else if trimmed_start.starts_with("~~~") {
|
||
Some("~~~")
|
||
} else {
|
||
None
|
||
};
|
||
if let Some(tok) = open {
|
||
// Flush pending text segment.
|
||
if !curr_text.is_empty() {
|
||
segments.push(Segment::Text(curr_text.clone()));
|
||
curr_text.clear();
|
||
}
|
||
fence_token = tok;
|
||
// Capture language after the token on this line (before newline).
|
||
let after = &trimmed_start[tok.len()..];
|
||
let lang = after.trim();
|
||
code_lang = if lang.is_empty() {
|
||
None
|
||
} else {
|
||
Some(lang.to_string())
|
||
};
|
||
code_mode = CodeMode::Fenced;
|
||
code_content.clear();
|
||
// Do not include the opening fence line in output.
|
||
continue;
|
||
}
|
||
// Check for start of an indented code block: only after a blank line
|
||
// (or at the beginning), and the line must start with >=4 spaces or a tab.
|
||
let raw_line = match line_no_nl {
|
||
Some(l) => l,
|
||
None => line,
|
||
};
|
||
let leading_spaces = raw_line.chars().take_while(|c| *c == ' ').count();
|
||
let starts_with_tab = raw_line.starts_with('\t');
|
||
// Consider any line that begins with >=4 spaces or a tab to start an
|
||
// indented code block. This favors preserving indentation even when a
|
||
// preceding blank line is omitted (common in streamed model output).
|
||
let starts_indented_code = (leading_spaces >= 4) || starts_with_tab;
|
||
if starts_indented_code {
|
||
// Flush pending text and begin an indented code block.
|
||
if !curr_text.is_empty() {
|
||
segments.push(Segment::Text(curr_text.clone()));
|
||
curr_text.clear();
|
||
}
|
||
code_mode = CodeMode::Indented;
|
||
code_content.clear();
|
||
code_content.push_str(line);
|
||
// Inside code now; do not treat this line as normal text.
|
||
continue;
|
||
}
|
||
// Normal text line.
|
||
curr_text.push_str(line);
|
||
} else {
|
||
match code_mode {
|
||
CodeMode::Fenced => {
|
||
// inside fenced code: check for closing fence on its own line
|
||
let trimmed = match line_no_nl {
|
||
Some(l) => l.trim(),
|
||
None => line.trim(),
|
||
};
|
||
if trimmed == fence_token {
|
||
// End code block: emit segment without fences
|
||
segments.push(Segment::Code {
|
||
_lang: code_lang.take(),
|
||
content: code_content.clone(),
|
||
});
|
||
code_content.clear();
|
||
code_mode = CodeMode::None;
|
||
fence_token = "";
|
||
continue;
|
||
}
|
||
// Accumulate code content exactly as-is.
|
||
code_content.push_str(line);
|
||
}
|
||
CodeMode::Indented => {
|
||
// Continue while the line is blank, or starts with >=4 spaces, or a tab.
|
||
let raw_line = match line_no_nl {
|
||
Some(l) => l,
|
||
None => line,
|
||
};
|
||
let is_blank = raw_line.trim().is_empty();
|
||
let leading_spaces = raw_line.chars().take_while(|c| *c == ' ').count();
|
||
let starts_with_tab = raw_line.starts_with('\t');
|
||
if is_blank || leading_spaces >= 4 || starts_with_tab {
|
||
code_content.push_str(line);
|
||
} else {
|
||
// Close the indented code block and reprocess this line as normal text.
|
||
segments.push(Segment::Code {
|
||
_lang: None,
|
||
content: code_content.clone(),
|
||
});
|
||
code_content.clear();
|
||
code_mode = CodeMode::None;
|
||
// Now handle current line as text.
|
||
curr_text.push_str(line);
|
||
}
|
||
}
|
||
CodeMode::None => unreachable!(),
|
||
}
|
||
}
|
||
}
|
||
|
||
if code_mode != CodeMode::None {
|
||
// Unterminated code fence: treat accumulated content as a code segment.
|
||
segments.push(Segment::Code {
|
||
_lang: code_lang.take(),
|
||
content: code_content.clone(),
|
||
});
|
||
} else if !curr_text.is_empty() {
|
||
segments.push(Segment::Text(curr_text.clone()));
|
||
}
|
||
|
||
segments
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
use pretty_assertions::assert_eq;
|
||
|
||
#[test]
|
||
fn citation_is_rewritten_with_absolute_path() {
|
||
let markdown = "See 【F:/src/main.rs†L42-L50】 for details.";
|
||
let cwd = Path::new("/workspace");
|
||
let result = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
|
||
|
||
assert_eq!(
|
||
"See [/src/main.rs:42](vscode://file/src/main.rs:42) for details.",
|
||
result
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn citation_is_rewritten_with_relative_path() {
|
||
let markdown = "Refer to 【F:lib/mod.rs†L5】 here.";
|
||
let cwd = Path::new("/home/user/project");
|
||
let result = rewrite_file_citations(markdown, UriBasedFileOpener::Windsurf, cwd);
|
||
|
||
assert_eq!(
|
||
"Refer to [lib/mod.rs:5](windsurf://file/home/user/project/lib/mod.rs:5) here.",
|
||
result
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn citation_followed_by_space_so_they_do_not_run_together() {
|
||
let markdown = "References on lines 【F:src/foo.rs†L24】【F:src/foo.rs†L42】";
|
||
let cwd = Path::new("/home/user/project");
|
||
let result = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
|
||
|
||
assert_eq!(
|
||
"References on lines [src/foo.rs:24](vscode://file/home/user/project/src/foo.rs:24) [src/foo.rs:42](vscode://file/home/user/project/src/foo.rs:42) ",
|
||
result
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn citation_unchanged_without_file_opener() {
|
||
let markdown = "Look at 【F:file.rs†L1】.";
|
||
let cwd = Path::new("/");
|
||
let unchanged = rewrite_file_citations(markdown, UriBasedFileOpener::VsCode, cwd);
|
||
// The helper itself always rewrites – this test validates behaviour of
|
||
// append_markdown when `file_opener` is None.
|
||
let mut out = Vec::new();
|
||
append_markdown_with_opener_and_cwd(markdown, &mut out, UriBasedFileOpener::None, cwd);
|
||
// Convert lines back to string for comparison.
|
||
let rendered: String = out
|
||
.iter()
|
||
.flat_map(|l| l.spans.iter())
|
||
.map(|s| s.content.clone())
|
||
.collect::<Vec<_>>()
|
||
.join("");
|
||
assert_eq!(markdown, rendered);
|
||
// Ensure helper rewrites.
|
||
assert_ne!(markdown, unchanged);
|
||
}
|
||
|
||
#[test]
|
||
fn fenced_code_blocks_preserve_leading_whitespace() {
|
||
let src = "```\n indented\n\t\twith tabs\n four spaces\n```\n";
|
||
let cwd = Path::new("/");
|
||
let mut out = Vec::new();
|
||
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
|
||
let rendered: Vec<String> = out
|
||
.iter()
|
||
.map(|l| {
|
||
l.spans
|
||
.iter()
|
||
.map(|s| s.content.clone())
|
||
.collect::<String>()
|
||
})
|
||
.collect();
|
||
assert_eq!(
|
||
rendered,
|
||
vec![
|
||
" indented".to_string(),
|
||
"\t\twith tabs".to_string(),
|
||
" four spaces".to_string()
|
||
]
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn citations_not_rewritten_inside_code_blocks() {
|
||
let src = "Before 【F:/x.rs†L1】\n```\nInside 【F:/x.rs†L2】\n```\nAfter 【F:/x.rs†L3】\n";
|
||
let cwd = Path::new("/");
|
||
let mut out = Vec::new();
|
||
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::VsCode, cwd);
|
||
let rendered: Vec<String> = out
|
||
.iter()
|
||
.map(|l| {
|
||
l.spans
|
||
.iter()
|
||
.map(|s| s.content.clone())
|
||
.collect::<String>()
|
||
})
|
||
.collect();
|
||
// Expect first and last lines rewritten, middle line unchanged.
|
||
assert!(rendered[0].contains("vscode://file"));
|
||
assert_eq!(rendered[1], "Inside 【F:/x.rs†L2】");
|
||
assert!(matches!(rendered.last(), Some(s) if s.contains("vscode://file")));
|
||
}
|
||
|
||
#[test]
|
||
fn indented_code_blocks_preserve_leading_whitespace() {
|
||
let src = "Before\n code 1\n\tcode with tab\n code 2\nAfter\n";
|
||
let cwd = Path::new("/");
|
||
let mut out = Vec::new();
|
||
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
|
||
let rendered: Vec<String> = out
|
||
.iter()
|
||
.map(|l| {
|
||
l.spans
|
||
.iter()
|
||
.map(|s| s.content.clone())
|
||
.collect::<String>()
|
||
})
|
||
.collect();
|
||
assert_eq!(
|
||
rendered,
|
||
vec![
|
||
"Before".to_string(),
|
||
" code 1".to_string(),
|
||
"\tcode with tab".to_string(),
|
||
" code 2".to_string(),
|
||
"After".to_string()
|
||
]
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn citations_not_rewritten_inside_indented_code_blocks() {
|
||
let src = "Start 【F:/x.rs†L1】\n\n Inside 【F:/x.rs†L2】\n\nEnd 【F:/x.rs†L3】\n";
|
||
let cwd = Path::new("/");
|
||
let mut out = Vec::new();
|
||
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::VsCode, cwd);
|
||
let rendered: Vec<String> = out
|
||
.iter()
|
||
.map(|l| {
|
||
l.spans
|
||
.iter()
|
||
.map(|s| s.content.clone())
|
||
.collect::<String>()
|
||
})
|
||
.collect();
|
||
// Expect first and last lines rewritten, and the indented code line present
|
||
// unchanged (citations inside not rewritten). We do not assert on blank
|
||
// separator lines since the markdown renderer may normalize them.
|
||
assert!(rendered.iter().any(|s| s.contains("vscode://file")));
|
||
assert!(rendered.iter().any(|s| s == " Inside 【F:/x.rs†L2】"));
|
||
}
|
||
|
||
#[test]
|
||
fn append_markdown_preserves_full_text_line() {
|
||
use codex_core::config_types::UriBasedFileOpener;
|
||
use std::path::Path;
|
||
let src = "Hi! How can I help with codex-rs today? Want me to explore the repo, run tests, or work on a specific change?\n";
|
||
let cwd = Path::new("/");
|
||
let mut out = Vec::new();
|
||
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
|
||
assert_eq!(
|
||
out.len(),
|
||
1,
|
||
"expected a single rendered line for plain text"
|
||
);
|
||
let rendered: String = out
|
||
.iter()
|
||
.flat_map(|l| l.spans.iter())
|
||
.map(|s| s.content.clone())
|
||
.collect::<Vec<_>>()
|
||
.join("");
|
||
assert_eq!(
|
||
rendered,
|
||
"Hi! How can I help with codex-rs today? Want me to explore the repo, run tests, or work on a specific change?"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn tui_markdown_splits_ordered_marker_and_text() {
|
||
// With marker and content on the same line, tui_markdown keeps it as one line
|
||
// even in the surrounding section context.
|
||
let rendered = tui_markdown::from_str("Loose vs. tight list items:\n1. Tight item\n");
|
||
let lines: Vec<String> = rendered
|
||
.lines
|
||
.iter()
|
||
.map(|l| {
|
||
l.spans
|
||
.iter()
|
||
.map(|s| s.content.clone())
|
||
.collect::<String>()
|
||
})
|
||
.collect();
|
||
assert!(
|
||
lines.iter().any(|w| w == "1. Tight item"),
|
||
"expected single line '1. Tight item' in context: {lines:?}"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn append_markdown_matches_tui_markdown_for_ordered_item() {
|
||
use codex_core::config_types::UriBasedFileOpener;
|
||
use std::path::Path;
|
||
let cwd = Path::new("/");
|
||
let mut out = Vec::new();
|
||
append_markdown_with_opener_and_cwd(
|
||
"1. Tight item\n",
|
||
&mut out,
|
||
UriBasedFileOpener::None,
|
||
cwd,
|
||
);
|
||
let lines: Vec<String> = out
|
||
.iter()
|
||
.map(|l| {
|
||
l.spans
|
||
.iter()
|
||
.map(|s| s.content.clone())
|
||
.collect::<String>()
|
||
})
|
||
.collect();
|
||
assert_eq!(lines, vec!["1. Tight item".to_string()]);
|
||
}
|
||
|
||
#[test]
|
||
fn tui_markdown_shape_for_loose_tight_section() {
|
||
// Use the exact source from the session deltas used in tests.
|
||
let source = r#"
|
||
Loose vs. tight list items:
|
||
1. Tight item
|
||
2. Another tight item
|
||
|
||
3.
|
||
Loose item
|
||
"#;
|
||
|
||
let rendered = tui_markdown::from_str(source);
|
||
let lines: Vec<String> = rendered
|
||
.lines
|
||
.iter()
|
||
.map(|l| {
|
||
l.spans
|
||
.iter()
|
||
.map(|s| s.content.clone())
|
||
.collect::<String>()
|
||
})
|
||
.collect();
|
||
// Join into a single string and assert the exact shape we observe
|
||
// from tui_markdown in this larger context (marker and content split).
|
||
let joined = {
|
||
let mut s = String::new();
|
||
for (i, l) in lines.iter().enumerate() {
|
||
s.push_str(l);
|
||
if i + 1 < lines.len() {
|
||
s.push('\n');
|
||
}
|
||
}
|
||
s
|
||
};
|
||
let expected = r#"Loose vs. tight list items:
|
||
|
||
1.
|
||
Tight item
|
||
2.
|
||
Another tight item
|
||
3.
|
||
Loose item"#;
|
||
assert_eq!(
|
||
joined, expected,
|
||
"unexpected tui_markdown shape: {joined:?}"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn split_text_and_fences_keeps_ordered_list_line_as_text() {
|
||
// No fences here; expect a single Text segment containing the full input.
|
||
let src = "Loose vs. tight list items:\n1. Tight item\n";
|
||
let segs = super::split_text_and_fences(src);
|
||
assert_eq!(
|
||
segs.len(),
|
||
1,
|
||
"expected single text segment, got {}",
|
||
segs.len()
|
||
);
|
||
match &segs[0] {
|
||
super::Segment::Text(s) => assert_eq!(s, src),
|
||
_ => panic!("expected Text segment for non-fence input"),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn append_markdown_keeps_ordered_list_line_unsplit_in_context() {
|
||
use codex_core::config_types::UriBasedFileOpener;
|
||
use std::path::Path;
|
||
let src = "Loose vs. tight list items:\n1. Tight item\n";
|
||
let cwd = Path::new("/");
|
||
let mut out = Vec::new();
|
||
append_markdown_with_opener_and_cwd(src, &mut out, UriBasedFileOpener::None, cwd);
|
||
|
||
let lines: Vec<String> = out
|
||
.iter()
|
||
.map(|l| {
|
||
l.spans
|
||
.iter()
|
||
.map(|s| s.content.clone())
|
||
.collect::<String>()
|
||
})
|
||
.collect();
|
||
|
||
// Expect to find the ordered list line rendered as a single line,
|
||
// not split into a marker-only line followed by the text.
|
||
assert!(
|
||
lines.iter().any(|s| s == "1. Tight item"),
|
||
"expected '1. Tight item' rendered as a single line; got: {lines:?}"
|
||
);
|
||
assert!(
|
||
!lines
|
||
.windows(2)
|
||
.any(|w| w[0].trim_end() == "1." && w[1] == "Tight item"),
|
||
"did not expect a split into ['1.', 'Tight item']; got: {lines:?}"
|
||
);
|
||
}
|
||
}
|