feat(tui): switch to tree-sitter-highlight bash highlighting (#4666)

use tree-sitter-highlight instead of custom logic over the tree-sitter
tree to highlight bash.
This commit is contained in:
Jeremy Rose
2025-10-07 16:20:12 -07:00
committed by GitHub
parent 0e5d72cc57
commit b8b04514bc
4 changed files with 229 additions and 121 deletions

18
codex-rs/Cargo.lock generated
View File

@@ -1424,6 +1424,8 @@ dependencies = [
"tracing",
"tracing-appender",
"tracing-subscriber",
"tree-sitter-bash",
"tree-sitter-highlight",
"unicode-segmentation",
"unicode-width 0.2.1",
"url",
@@ -6261,9 +6263,9 @@ dependencies = [
[[package]]
name = "tree-sitter"
version = "0.25.9"
version = "0.25.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccd2a058a86cfece0bf96f7cce1021efef9c8ed0e892ab74639173e5ed7a34fa"
checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87"
dependencies = [
"cc",
"regex",
@@ -6283,6 +6285,18 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-highlight"
version = "0.25.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc5f880ad8d8f94e88cb81c3557024cf1a8b75e3b504c50481ed4f5a6006ff3"
dependencies = [
"regex",
"streaming-iterator",
"thiserror 2.0.16",
"tree-sitter",
]
[[package]]
name = "tree-sitter-language"
version = "0.1.5"

View File

@@ -175,8 +175,9 @@ tracing = "0.1.41"
tracing-appender = "0.2.3"
tracing-subscriber = "0.3.20"
tracing-test = "0.2.5"
tree-sitter = "0.25.9"
tree-sitter-bash = "0.25.0"
tree-sitter = "0.25.10"
tree-sitter-bash = "0.25"
tree-sitter-highlight = "0.25.10"
ts-rs = "11"
unicode-segmentation = "1.12.0"
unicode-width = "0.2"

View File

@@ -68,6 +68,8 @@ strum_macros = { workspace = true }
supports-color = { workspace = true }
tempfile = { workspace = true }
textwrap = { workspace = true }
tree-sitter-highlight = { workspace = true }
tree-sitter-bash = { workspace = true }
tokio = { workspace = true, features = [
"io-std",
"macros",

View File

@@ -1,81 +1,146 @@
use codex_core::bash::try_parse_bash;
use ratatui::style::Style;
use ratatui::style::Stylize;
use ratatui::text::Line;
use ratatui::text::Span;
use std::sync::OnceLock;
use tree_sitter_highlight::Highlight;
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_highlight::HighlightEvent;
use tree_sitter_highlight::Highlighter;
/// Convert the full bash script into per-line styled content by first
/// computing operator-dimmed spans across the entire script, then splitting
/// by newlines and dimming heredoc body lines. Performs a single parse and
/// reuses it for both highlighting and heredoc detection.
pub(crate) fn highlight_bash_to_lines(script: &str) -> Vec<Line<'static>> {
// Parse once; use the tree for both highlighting and heredoc body detection.
let spans: Vec<Span<'static>> = if let Some(tree) = try_parse_bash(script) {
// Single walk: collect operator ranges and heredoc rows.
let root = tree.root_node();
let mut cursor = root.walk();
let mut stack = vec![root];
let mut ranges: Vec<(usize, usize)> = Vec::new();
while let Some(node) = stack.pop() {
if !node.is_named() && !node.is_extra() {
let kind = node.kind();
let is_quote = matches!(kind, "\"" | "'" | "`");
let is_whitespace = kind.trim().is_empty();
if !is_quote && !is_whitespace {
ranges.push((node.start_byte(), node.end_byte()));
}
} else if node.kind() == "heredoc_body" {
ranges.push((node.start_byte(), node.end_byte()));
}
for child in node.children(&mut cursor) {
stack.push(child);
}
}
if ranges.is_empty() {
ranges.push((script.len(), script.len()));
}
ranges.sort_by_key(|(st, _)| *st);
let mut spans: Vec<Span<'static>> = Vec::new();
let mut i = 0usize;
for (start, end) in ranges.into_iter() {
let dim_start = start.max(i);
let dim_end = end;
if dim_start < dim_end {
if dim_start > i {
spans.push(script[i..dim_start].to_string().into());
}
spans.push(script[dim_start..dim_end].to_string().dim());
i = dim_end;
}
}
if i < script.len() {
spans.push(script[i..].to_string().into());
}
spans
} else {
vec![script.to_string().into()]
};
// Split spans into lines preserving style boundaries and highlights across newlines.
let mut lines: Vec<Line<'static>> = vec![Line::from("")];
for sp in spans {
let style = sp.style;
let text = sp.content.into_owned();
for (i, part) in text.split('\n').enumerate() {
if i > 0 {
lines.push(Line::from(""));
}
if part.is_empty() {
continue;
}
let span = Span {
style,
content: std::borrow::Cow::Owned(part.to_string()),
};
if let Some(last) = lines.last_mut() {
last.spans.push(span);
}
// Ref: https://github.com/tree-sitter/tree-sitter-bash/blob/master/queries/highlights.scm
#[derive(Copy, Clone)]
enum BashHighlight {
Comment,
Constant,
Embedded,
Function,
Keyword,
Number,
Operator,
Property,
String,
}
impl BashHighlight {
const ALL: [Self; 9] = [
Self::Comment,
Self::Constant,
Self::Embedded,
Self::Function,
Self::Keyword,
Self::Number,
Self::Operator,
Self::Property,
Self::String,
];
const fn as_str(self) -> &'static str {
match self {
Self::Comment => "comment",
Self::Constant => "constant",
Self::Embedded => "embedded",
Self::Function => "function",
Self::Keyword => "keyword",
Self::Number => "number",
Self::Operator => "operator",
Self::Property => "property",
Self::String => "string",
}
}
lines
fn style(self) -> Style {
match self {
Self::Comment | Self::Operator | Self::String => Style::default().dim(),
_ => Style::default(),
}
}
}
static HIGHLIGHT_CONFIG: OnceLock<HighlightConfiguration> = OnceLock::new();
fn highlight_names() -> &'static [&'static str] {
static NAMES: OnceLock<[&'static str; BashHighlight::ALL.len()]> = OnceLock::new();
NAMES
.get_or_init(|| BashHighlight::ALL.map(BashHighlight::as_str))
.as_slice()
}
fn highlight_config() -> &'static HighlightConfiguration {
HIGHLIGHT_CONFIG.get_or_init(|| {
let language = tree_sitter_bash::LANGUAGE.into();
#[expect(clippy::expect_used)]
let mut config = HighlightConfiguration::new(
language,
"bash",
tree_sitter_bash::HIGHLIGHT_QUERY,
"",
"",
)
.expect("load bash highlight query");
config.configure(highlight_names());
config
})
}
fn highlight_for(highlight: Highlight) -> BashHighlight {
BashHighlight::ALL[highlight.0]
}
fn push_segment(lines: &mut Vec<Line<'static>>, segment: &str, style: Option<Style>) {
for (i, part) in segment.split('\n').enumerate() {
if i > 0 {
lines.push(Line::from(""));
}
if part.is_empty() {
continue;
}
let span = match style {
Some(style) => Span::styled(part.to_string(), style),
None => part.to_string().into(),
};
if let Some(last) = lines.last_mut() {
last.spans.push(span);
}
}
}
/// Convert a bash script into per-line styled content using tree-sitter's
/// bash highlight query. The highlighter is streamed so multi-line content is
/// split into `Line`s while preserving style boundaries.
pub(crate) fn highlight_bash_to_lines(script: &str) -> Vec<Line<'static>> {
let mut highlighter = Highlighter::new();
let iterator =
match highlighter.highlight(highlight_config(), script.as_bytes(), None, |_| None) {
Ok(iter) => iter,
Err(_) => return vec![script.to_string().into()],
};
let mut lines: Vec<Line<'static>> = vec![Line::from("")];
let mut highlight_stack: Vec<Highlight> = Vec::new();
for event in iterator {
match event {
Ok(HighlightEvent::HighlightStart(highlight)) => highlight_stack.push(highlight),
Ok(HighlightEvent::HighlightEnd) => {
highlight_stack.pop();
}
Ok(HighlightEvent::Source { start, end }) => {
if start == end {
continue;
}
let style = highlight_stack.last().map(|h| highlight_for(*h).style());
push_segment(&mut lines, &script[start..end], style);
}
Err(_) => return vec![script.to_string().into()],
}
}
if lines.is_empty() {
vec![Line::from("")]
} else {
lines
}
}
#[cfg(test)]
@@ -84,62 +149,88 @@ mod tests {
use pretty_assertions::assert_eq;
use ratatui::style::Modifier;
fn reconstructed(lines: &[Line<'static>]) -> String {
lines
.iter()
.map(|l| {
l.spans
.iter()
.map(|sp| sp.content.clone())
.collect::<String>()
})
.collect::<Vec<_>>()
.join("\n")
}
fn dimmed_tokens(lines: &[Line<'static>]) -> Vec<String> {
lines
.iter()
.flat_map(|l| l.spans.iter())
.filter(|sp| sp.style.add_modifier.contains(Modifier::DIM))
.map(|sp| sp.content.clone().into_owned())
.map(|token| token.trim().to_string())
.filter(|token| !token.is_empty())
.collect()
}
#[test]
fn dims_expected_bash_operators() {
let s = "echo foo && bar || baz | qux & (echo hi)";
let lines = highlight_bash_to_lines(s);
let reconstructed: String = lines
.iter()
.map(|l| {
l.spans
.iter()
.map(|sp| sp.content.clone())
.collect::<String>()
})
.collect::<Vec<_>>()
.join("\n");
assert_eq!(reconstructed, s);
assert_eq!(reconstructed(&lines), s);
fn is_dim(span: &Span<'_>) -> bool {
span.style.add_modifier.contains(Modifier::DIM)
}
let dimmed: Vec<String> = lines
.iter()
.flat_map(|l| l.spans.iter())
.filter(|sp| is_dim(sp))
.map(|sp| sp.content.clone().into_owned())
.collect();
assert_eq!(dimmed, vec!["&&", "||", "|", "&", "(", ")"]);
let dimmed = dimmed_tokens(&lines);
assert!(dimmed.contains(&"&&".to_string()));
assert!(dimmed.contains(&"|".to_string()));
assert!(!dimmed.contains(&"echo".to_string()));
}
#[test]
fn does_not_dim_quotes_but_dims_other_punct() {
fn dims_redirects_and_strings() {
let s = "echo \"hi\" > out.txt; echo 'ok'";
let lines = highlight_bash_to_lines(s);
let reconstructed: String = lines
.iter()
.map(|l| {
l.spans
.iter()
.map(|sp| sp.content.clone())
.collect::<String>()
})
.collect::<Vec<_>>()
.join("\n");
assert_eq!(reconstructed, s);
assert_eq!(reconstructed(&lines), s);
fn is_dim(span: &Span<'_>) -> bool {
span.style.add_modifier.contains(Modifier::DIM)
}
let dimmed: Vec<String> = lines
.iter()
.flat_map(|l| l.spans.iter())
.filter(|sp| is_dim(sp))
.map(|sp| sp.content.clone().into_owned())
.collect();
let dimmed = dimmed_tokens(&lines);
assert!(dimmed.contains(&">".to_string()));
assert!(dimmed.contains(&";".to_string()));
assert!(!dimmed.contains(&"\"".to_string()));
assert!(!dimmed.contains(&"'".to_string()));
assert!(dimmed.contains(&"\"hi\"".to_string()));
assert!(dimmed.contains(&"'ok'".to_string()));
}
#[test]
fn highlights_command_and_strings() {
let s = "echo \"hi\"";
let lines = highlight_bash_to_lines(s);
let mut echo_style = None;
let mut string_style = None;
for span in &lines[0].spans {
let text = span.content.as_ref();
if text == "echo" {
echo_style = Some(span.style);
}
if text == "\"hi\"" {
string_style = Some(span.style);
}
}
let echo_style = echo_style.expect("echo span missing");
let string_style = string_style.expect("string span missing");
assert!(echo_style.fg.is_none());
assert!(!echo_style.add_modifier.contains(Modifier::DIM));
assert!(string_style.add_modifier.contains(Modifier::DIM));
}
#[test]
fn highlights_heredoc_body_as_string() {
let s = "cat <<EOF\nheredoc body\nEOF";
let lines = highlight_bash_to_lines(s);
let body_line = &lines[1];
let mut body_style = None;
for span in &body_line.spans {
if span.content.as_ref() == "heredoc body" {
body_style = Some(span.style);
}
}
let body_style = body_style.expect("missing heredoc span");
assert!(body_style.add_modifier.contains(Modifier::DIM));
}
}