feat(tui): switch to tree-sitter-highlight bash highlighting (#4666)
use tree-sitter-highlight instead of custom logic over the tree-sitter tree to highlight bash.
This commit is contained in:
18
codex-rs/Cargo.lock
generated
18
codex-rs/Cargo.lock
generated
@@ -1424,6 +1424,8 @@ dependencies = [
|
||||
"tracing",
|
||||
"tracing-appender",
|
||||
"tracing-subscriber",
|
||||
"tree-sitter-bash",
|
||||
"tree-sitter-highlight",
|
||||
"unicode-segmentation",
|
||||
"unicode-width 0.2.1",
|
||||
"url",
|
||||
@@ -6261,9 +6263,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter"
|
||||
version = "0.25.9"
|
||||
version = "0.25.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ccd2a058a86cfece0bf96f7cce1021efef9c8ed0e892ab74639173e5ed7a34fa"
|
||||
checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"regex",
|
||||
@@ -6283,6 +6285,18 @@ dependencies = [
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-highlight"
|
||||
version = "0.25.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adc5f880ad8d8f94e88cb81c3557024cf1a8b75e3b504c50481ed4f5a6006ff3"
|
||||
dependencies = [
|
||||
"regex",
|
||||
"streaming-iterator",
|
||||
"thiserror 2.0.16",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-language"
|
||||
version = "0.1.5"
|
||||
|
||||
@@ -175,8 +175,9 @@ tracing = "0.1.41"
|
||||
tracing-appender = "0.2.3"
|
||||
tracing-subscriber = "0.3.20"
|
||||
tracing-test = "0.2.5"
|
||||
tree-sitter = "0.25.9"
|
||||
tree-sitter-bash = "0.25.0"
|
||||
tree-sitter = "0.25.10"
|
||||
tree-sitter-bash = "0.25"
|
||||
tree-sitter-highlight = "0.25.10"
|
||||
ts-rs = "11"
|
||||
unicode-segmentation = "1.12.0"
|
||||
unicode-width = "0.2"
|
||||
|
||||
@@ -68,6 +68,8 @@ strum_macros = { workspace = true }
|
||||
supports-color = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
textwrap = { workspace = true }
|
||||
tree-sitter-highlight = { workspace = true }
|
||||
tree-sitter-bash = { workspace = true }
|
||||
tokio = { workspace = true, features = [
|
||||
"io-std",
|
||||
"macros",
|
||||
|
||||
@@ -1,81 +1,146 @@
|
||||
use codex_core::bash::try_parse_bash;
|
||||
use ratatui::style::Style;
|
||||
use ratatui::style::Stylize;
|
||||
use ratatui::text::Line;
|
||||
use ratatui::text::Span;
|
||||
use std::sync::OnceLock;
|
||||
use tree_sitter_highlight::Highlight;
|
||||
use tree_sitter_highlight::HighlightConfiguration;
|
||||
use tree_sitter_highlight::HighlightEvent;
|
||||
use tree_sitter_highlight::Highlighter;
|
||||
|
||||
/// Convert the full bash script into per-line styled content by first
|
||||
/// computing operator-dimmed spans across the entire script, then splitting
|
||||
/// by newlines and dimming heredoc body lines. Performs a single parse and
|
||||
/// reuses it for both highlighting and heredoc detection.
|
||||
pub(crate) fn highlight_bash_to_lines(script: &str) -> Vec<Line<'static>> {
|
||||
// Parse once; use the tree for both highlighting and heredoc body detection.
|
||||
let spans: Vec<Span<'static>> = if let Some(tree) = try_parse_bash(script) {
|
||||
// Single walk: collect operator ranges and heredoc rows.
|
||||
let root = tree.root_node();
|
||||
let mut cursor = root.walk();
|
||||
let mut stack = vec![root];
|
||||
let mut ranges: Vec<(usize, usize)> = Vec::new();
|
||||
while let Some(node) = stack.pop() {
|
||||
if !node.is_named() && !node.is_extra() {
|
||||
let kind = node.kind();
|
||||
let is_quote = matches!(kind, "\"" | "'" | "`");
|
||||
let is_whitespace = kind.trim().is_empty();
|
||||
if !is_quote && !is_whitespace {
|
||||
ranges.push((node.start_byte(), node.end_byte()));
|
||||
}
|
||||
} else if node.kind() == "heredoc_body" {
|
||||
ranges.push((node.start_byte(), node.end_byte()));
|
||||
}
|
||||
for child in node.children(&mut cursor) {
|
||||
stack.push(child);
|
||||
}
|
||||
}
|
||||
if ranges.is_empty() {
|
||||
ranges.push((script.len(), script.len()));
|
||||
}
|
||||
ranges.sort_by_key(|(st, _)| *st);
|
||||
let mut spans: Vec<Span<'static>> = Vec::new();
|
||||
let mut i = 0usize;
|
||||
for (start, end) in ranges.into_iter() {
|
||||
let dim_start = start.max(i);
|
||||
let dim_end = end;
|
||||
if dim_start < dim_end {
|
||||
if dim_start > i {
|
||||
spans.push(script[i..dim_start].to_string().into());
|
||||
}
|
||||
spans.push(script[dim_start..dim_end].to_string().dim());
|
||||
i = dim_end;
|
||||
}
|
||||
}
|
||||
if i < script.len() {
|
||||
spans.push(script[i..].to_string().into());
|
||||
}
|
||||
spans
|
||||
} else {
|
||||
vec![script.to_string().into()]
|
||||
};
|
||||
// Split spans into lines preserving style boundaries and highlights across newlines.
|
||||
let mut lines: Vec<Line<'static>> = vec![Line::from("")];
|
||||
for sp in spans {
|
||||
let style = sp.style;
|
||||
let text = sp.content.into_owned();
|
||||
for (i, part) in text.split('\n').enumerate() {
|
||||
if i > 0 {
|
||||
lines.push(Line::from(""));
|
||||
}
|
||||
if part.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let span = Span {
|
||||
style,
|
||||
content: std::borrow::Cow::Owned(part.to_string()),
|
||||
};
|
||||
if let Some(last) = lines.last_mut() {
|
||||
last.spans.push(span);
|
||||
}
|
||||
// Ref: https://github.com/tree-sitter/tree-sitter-bash/blob/master/queries/highlights.scm
|
||||
#[derive(Copy, Clone)]
|
||||
enum BashHighlight {
|
||||
Comment,
|
||||
Constant,
|
||||
Embedded,
|
||||
Function,
|
||||
Keyword,
|
||||
Number,
|
||||
Operator,
|
||||
Property,
|
||||
String,
|
||||
}
|
||||
|
||||
impl BashHighlight {
|
||||
const ALL: [Self; 9] = [
|
||||
Self::Comment,
|
||||
Self::Constant,
|
||||
Self::Embedded,
|
||||
Self::Function,
|
||||
Self::Keyword,
|
||||
Self::Number,
|
||||
Self::Operator,
|
||||
Self::Property,
|
||||
Self::String,
|
||||
];
|
||||
|
||||
const fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Comment => "comment",
|
||||
Self::Constant => "constant",
|
||||
Self::Embedded => "embedded",
|
||||
Self::Function => "function",
|
||||
Self::Keyword => "keyword",
|
||||
Self::Number => "number",
|
||||
Self::Operator => "operator",
|
||||
Self::Property => "property",
|
||||
Self::String => "string",
|
||||
}
|
||||
}
|
||||
lines
|
||||
|
||||
fn style(self) -> Style {
|
||||
match self {
|
||||
Self::Comment | Self::Operator | Self::String => Style::default().dim(),
|
||||
_ => Style::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static HIGHLIGHT_CONFIG: OnceLock<HighlightConfiguration> = OnceLock::new();
|
||||
|
||||
fn highlight_names() -> &'static [&'static str] {
|
||||
static NAMES: OnceLock<[&'static str; BashHighlight::ALL.len()]> = OnceLock::new();
|
||||
NAMES
|
||||
.get_or_init(|| BashHighlight::ALL.map(BashHighlight::as_str))
|
||||
.as_slice()
|
||||
}
|
||||
|
||||
fn highlight_config() -> &'static HighlightConfiguration {
|
||||
HIGHLIGHT_CONFIG.get_or_init(|| {
|
||||
let language = tree_sitter_bash::LANGUAGE.into();
|
||||
#[expect(clippy::expect_used)]
|
||||
let mut config = HighlightConfiguration::new(
|
||||
language,
|
||||
"bash",
|
||||
tree_sitter_bash::HIGHLIGHT_QUERY,
|
||||
"",
|
||||
"",
|
||||
)
|
||||
.expect("load bash highlight query");
|
||||
config.configure(highlight_names());
|
||||
config
|
||||
})
|
||||
}
|
||||
|
||||
fn highlight_for(highlight: Highlight) -> BashHighlight {
|
||||
BashHighlight::ALL[highlight.0]
|
||||
}
|
||||
|
||||
fn push_segment(lines: &mut Vec<Line<'static>>, segment: &str, style: Option<Style>) {
|
||||
for (i, part) in segment.split('\n').enumerate() {
|
||||
if i > 0 {
|
||||
lines.push(Line::from(""));
|
||||
}
|
||||
if part.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let span = match style {
|
||||
Some(style) => Span::styled(part.to_string(), style),
|
||||
None => part.to_string().into(),
|
||||
};
|
||||
if let Some(last) = lines.last_mut() {
|
||||
last.spans.push(span);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a bash script into per-line styled content using tree-sitter's
|
||||
/// bash highlight query. The highlighter is streamed so multi-line content is
|
||||
/// split into `Line`s while preserving style boundaries.
|
||||
pub(crate) fn highlight_bash_to_lines(script: &str) -> Vec<Line<'static>> {
|
||||
let mut highlighter = Highlighter::new();
|
||||
let iterator =
|
||||
match highlighter.highlight(highlight_config(), script.as_bytes(), None, |_| None) {
|
||||
Ok(iter) => iter,
|
||||
Err(_) => return vec![script.to_string().into()],
|
||||
};
|
||||
|
||||
let mut lines: Vec<Line<'static>> = vec![Line::from("")];
|
||||
let mut highlight_stack: Vec<Highlight> = Vec::new();
|
||||
|
||||
for event in iterator {
|
||||
match event {
|
||||
Ok(HighlightEvent::HighlightStart(highlight)) => highlight_stack.push(highlight),
|
||||
Ok(HighlightEvent::HighlightEnd) => {
|
||||
highlight_stack.pop();
|
||||
}
|
||||
Ok(HighlightEvent::Source { start, end }) => {
|
||||
if start == end {
|
||||
continue;
|
||||
}
|
||||
let style = highlight_stack.last().map(|h| highlight_for(*h).style());
|
||||
push_segment(&mut lines, &script[start..end], style);
|
||||
}
|
||||
Err(_) => return vec![script.to_string().into()],
|
||||
}
|
||||
}
|
||||
|
||||
if lines.is_empty() {
|
||||
vec![Line::from("")]
|
||||
} else {
|
||||
lines
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -84,62 +149,88 @@ mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
use ratatui::style::Modifier;
|
||||
|
||||
fn reconstructed(lines: &[Line<'static>]) -> String {
|
||||
lines
|
||||
.iter()
|
||||
.map(|l| {
|
||||
l.spans
|
||||
.iter()
|
||||
.map(|sp| sp.content.clone())
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
fn dimmed_tokens(lines: &[Line<'static>]) -> Vec<String> {
|
||||
lines
|
||||
.iter()
|
||||
.flat_map(|l| l.spans.iter())
|
||||
.filter(|sp| sp.style.add_modifier.contains(Modifier::DIM))
|
||||
.map(|sp| sp.content.clone().into_owned())
|
||||
.map(|token| token.trim().to_string())
|
||||
.filter(|token| !token.is_empty())
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dims_expected_bash_operators() {
|
||||
let s = "echo foo && bar || baz | qux & (echo hi)";
|
||||
let lines = highlight_bash_to_lines(s);
|
||||
let reconstructed: String = lines
|
||||
.iter()
|
||||
.map(|l| {
|
||||
l.spans
|
||||
.iter()
|
||||
.map(|sp| sp.content.clone())
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
assert_eq!(reconstructed, s);
|
||||
assert_eq!(reconstructed(&lines), s);
|
||||
|
||||
fn is_dim(span: &Span<'_>) -> bool {
|
||||
span.style.add_modifier.contains(Modifier::DIM)
|
||||
}
|
||||
let dimmed: Vec<String> = lines
|
||||
.iter()
|
||||
.flat_map(|l| l.spans.iter())
|
||||
.filter(|sp| is_dim(sp))
|
||||
.map(|sp| sp.content.clone().into_owned())
|
||||
.collect();
|
||||
assert_eq!(dimmed, vec!["&&", "||", "|", "&", "(", ")"]);
|
||||
let dimmed = dimmed_tokens(&lines);
|
||||
assert!(dimmed.contains(&"&&".to_string()));
|
||||
assert!(dimmed.contains(&"|".to_string()));
|
||||
assert!(!dimmed.contains(&"echo".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_dim_quotes_but_dims_other_punct() {
|
||||
fn dims_redirects_and_strings() {
|
||||
let s = "echo \"hi\" > out.txt; echo 'ok'";
|
||||
let lines = highlight_bash_to_lines(s);
|
||||
let reconstructed: String = lines
|
||||
.iter()
|
||||
.map(|l| {
|
||||
l.spans
|
||||
.iter()
|
||||
.map(|sp| sp.content.clone())
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
assert_eq!(reconstructed, s);
|
||||
assert_eq!(reconstructed(&lines), s);
|
||||
|
||||
fn is_dim(span: &Span<'_>) -> bool {
|
||||
span.style.add_modifier.contains(Modifier::DIM)
|
||||
}
|
||||
let dimmed: Vec<String> = lines
|
||||
.iter()
|
||||
.flat_map(|l| l.spans.iter())
|
||||
.filter(|sp| is_dim(sp))
|
||||
.map(|sp| sp.content.clone().into_owned())
|
||||
.collect();
|
||||
let dimmed = dimmed_tokens(&lines);
|
||||
assert!(dimmed.contains(&">".to_string()));
|
||||
assert!(dimmed.contains(&";".to_string()));
|
||||
assert!(!dimmed.contains(&"\"".to_string()));
|
||||
assert!(!dimmed.contains(&"'".to_string()));
|
||||
assert!(dimmed.contains(&"\"hi\"".to_string()));
|
||||
assert!(dimmed.contains(&"'ok'".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlights_command_and_strings() {
|
||||
let s = "echo \"hi\"";
|
||||
let lines = highlight_bash_to_lines(s);
|
||||
let mut echo_style = None;
|
||||
let mut string_style = None;
|
||||
for span in &lines[0].spans {
|
||||
let text = span.content.as_ref();
|
||||
if text == "echo" {
|
||||
echo_style = Some(span.style);
|
||||
}
|
||||
if text == "\"hi\"" {
|
||||
string_style = Some(span.style);
|
||||
}
|
||||
}
|
||||
let echo_style = echo_style.expect("echo span missing");
|
||||
let string_style = string_style.expect("string span missing");
|
||||
assert!(echo_style.fg.is_none());
|
||||
assert!(!echo_style.add_modifier.contains(Modifier::DIM));
|
||||
assert!(string_style.add_modifier.contains(Modifier::DIM));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlights_heredoc_body_as_string() {
|
||||
let s = "cat <<EOF\nheredoc body\nEOF";
|
||||
let lines = highlight_bash_to_lines(s);
|
||||
let body_line = &lines[1];
|
||||
let mut body_style = None;
|
||||
for span in &body_line.spans {
|
||||
if span.content.as_ref() == "heredoc body" {
|
||||
body_style = Some(span.style);
|
||||
}
|
||||
}
|
||||
let body_style = body_style.expect("missing heredoc span");
|
||||
assert!(body_style.add_modifier.contains(Modifier::DIM));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user