Treat zsh -lc like bash -lc (#5411)
Without proper `zsh -lc` parsing, we lose some things like proper command parsing, turn diff tracking, safe command checks, and other things we expect from raw or `bash -lc` commands.
This commit is contained in:
@@ -5,13 +5,13 @@ use tree_sitter_bash::LANGUAGE as BASH;
|
|||||||
|
|
||||||
/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
|
/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
|
||||||
/// success or None if parsing failed.
|
/// success or None if parsing failed.
|
||||||
pub fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> {
|
pub fn try_parse_shell(shell_lc_arg: &str) -> Option<Tree> {
|
||||||
let lang = BASH.into();
|
let lang = BASH.into();
|
||||||
let mut parser = Parser::new();
|
let mut parser = Parser::new();
|
||||||
#[expect(clippy::expect_used)]
|
#[expect(clippy::expect_used)]
|
||||||
parser.set_language(&lang).expect("load bash grammar");
|
parser.set_language(&lang).expect("load bash grammar");
|
||||||
let old_tree: Option<&Tree> = None;
|
let old_tree: Option<&Tree> = None;
|
||||||
parser.parse(bash_lc_arg, old_tree)
|
parser.parse(shell_lc_arg, old_tree)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a script which may contain multiple simple commands joined only by
|
/// Parse a script which may contain multiple simple commands joined only by
|
||||||
@@ -88,18 +88,19 @@ pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<V
|
|||||||
Some(commands)
|
Some(commands)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the sequence of plain commands within a `bash -lc "..."` invocation
|
/// Returns the sequence of plain commands within a `bash -lc "..."` or
|
||||||
/// when the script only contains word-only commands joined by safe operators.
|
/// `zsh -lc "..."` invocation when the script only contains word-only commands
|
||||||
pub fn parse_bash_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
|
/// joined by safe operators.
|
||||||
let [bash, flag, script] = command else {
|
pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
|
||||||
|
let [shell, flag, script] = command else {
|
||||||
return None;
|
return None;
|
||||||
};
|
};
|
||||||
|
|
||||||
if bash != "bash" || flag != "-lc" {
|
if flag != "-lc" || !(shell == "bash" || shell == "zsh") {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
let tree = try_parse_bash(script)?;
|
let tree = try_parse_shell(script)?;
|
||||||
try_parse_word_only_commands_sequence(&tree, script)
|
try_parse_word_only_commands_sequence(&tree, script)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -154,7 +155,7 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
fn parse_seq(src: &str) -> Option<Vec<Vec<String>>> {
|
fn parse_seq(src: &str) -> Option<Vec<Vec<String>>> {
|
||||||
let tree = try_parse_bash(src)?;
|
let tree = try_parse_shell(src)?;
|
||||||
try_parse_word_only_commands_sequence(&tree, src)
|
try_parse_word_only_commands_sequence(&tree, src)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -234,4 +235,11 @@ mod tests {
|
|||||||
fn rejects_trailing_operator_parse_error() {
|
fn rejects_trailing_operator_parse_error() {
|
||||||
assert!(parse_seq("ls &&").is_none());
|
assert!(parse_seq("ls &&").is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_zsh_lc_plain_commands() {
|
||||||
|
let command = vec!["zsh".to_string(), "-lc".to_string(), "ls".to_string()];
|
||||||
|
let parsed = parse_shell_lc_plain_commands(&command).unwrap();
|
||||||
|
assert_eq!(parsed, vec![vec!["ls".to_string()]]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use crate::bash::parse_bash_lc_plain_commands;
|
use crate::bash::parse_shell_lc_plain_commands;
|
||||||
|
|
||||||
pub fn command_might_be_dangerous(command: &[String]) -> bool {
|
pub fn command_might_be_dangerous(command: &[String]) -> bool {
|
||||||
if is_dangerous_to_call_with_exec(command) {
|
if is_dangerous_to_call_with_exec(command) {
|
||||||
@@ -6,7 +6,7 @@ pub fn command_might_be_dangerous(command: &[String]) -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Support `bash -lc "<script>"` where the any part of the script might contain a dangerous command.
|
// Support `bash -lc "<script>"` where the any part of the script might contain a dangerous command.
|
||||||
if let Some(all_commands) = parse_bash_lc_plain_commands(command)
|
if let Some(all_commands) = parse_shell_lc_plain_commands(command)
|
||||||
&& all_commands
|
&& all_commands
|
||||||
.iter()
|
.iter()
|
||||||
.any(|cmd| is_dangerous_to_call_with_exec(cmd))
|
.any(|cmd| is_dangerous_to_call_with_exec(cmd))
|
||||||
@@ -57,6 +57,15 @@ mod tests {
|
|||||||
])));
|
])));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn zsh_git_reset_is_dangerous() {
|
||||||
|
assert!(command_might_be_dangerous(&vec_str(&[
|
||||||
|
"zsh",
|
||||||
|
"-lc",
|
||||||
|
"git reset --hard"
|
||||||
|
])));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn git_status_is_not_dangerous() {
|
fn git_status_is_not_dangerous() {
|
||||||
assert!(!command_might_be_dangerous(&vec_str(&["git", "status"])));
|
assert!(!command_might_be_dangerous(&vec_str(&["git", "status"])));
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use crate::bash::parse_bash_lc_plain_commands;
|
use crate::bash::parse_shell_lc_plain_commands;
|
||||||
|
|
||||||
pub fn is_known_safe_command(command: &[String]) -> bool {
|
pub fn is_known_safe_command(command: &[String]) -> bool {
|
||||||
let command: Vec<String> = command
|
let command: Vec<String> = command
|
||||||
@@ -29,7 +29,7 @@ pub fn is_known_safe_command(command: &[String]) -> bool {
|
|||||||
// introduce side effects ( "&&", "||", ";", and "|" ). If every
|
// introduce side effects ( "&&", "||", ";", and "|" ). If every
|
||||||
// individual command in the script is itself a known‑safe command, then
|
// individual command in the script is itself a known‑safe command, then
|
||||||
// the composite expression is considered safe.
|
// the composite expression is considered safe.
|
||||||
if let Some(all_commands) = parse_bash_lc_plain_commands(&command)
|
if let Some(all_commands) = parse_shell_lc_plain_commands(&command)
|
||||||
&& !all_commands.is_empty()
|
&& !all_commands.is_empty()
|
||||||
&& all_commands
|
&& all_commands
|
||||||
.iter()
|
.iter()
|
||||||
@@ -201,6 +201,11 @@ mod tests {
|
|||||||
])));
|
])));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn zsh_lc_safe_command_sequence() {
|
||||||
|
assert!(is_known_safe_command(&vec_str(&["zsh", "-lc", "ls"])));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn unknown_or_partial() {
|
fn unknown_or_partial() {
|
||||||
assert!(!is_safe_to_call_with_exec(&vec_str(&["foo"])));
|
assert!(!is_safe_to_call_with_exec(&vec_str(&["foo"])));
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use crate::bash::try_parse_bash;
|
use crate::bash::try_parse_shell;
|
||||||
use crate::bash::try_parse_word_only_commands_sequence;
|
use crate::bash::try_parse_word_only_commands_sequence;
|
||||||
use codex_protocol::parse_command::ParsedCommand;
|
use codex_protocol::parse_command::ParsedCommand;
|
||||||
use shlex::split as shlex_split;
|
use shlex::split as shlex_split;
|
||||||
@@ -193,6 +193,19 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn zsh_lc_supports_cat() {
|
||||||
|
let inner = "cat README.md";
|
||||||
|
assert_parsed(
|
||||||
|
&vec_str(&["zsh", "-lc", inner]),
|
||||||
|
vec![ParsedCommand::Read {
|
||||||
|
cmd: inner.to_string(),
|
||||||
|
name: "README.md".to_string(),
|
||||||
|
path: PathBuf::from("README.md"),
|
||||||
|
}],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn cd_then_cat_is_single_read() {
|
fn cd_then_cat_is_single_read() {
|
||||||
assert_parsed(
|
assert_parsed(
|
||||||
@@ -843,7 +856,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_command_impl(command: &[String]) -> Vec<ParsedCommand> {
|
pub fn parse_command_impl(command: &[String]) -> Vec<ParsedCommand> {
|
||||||
if let Some(commands) = parse_bash_lc_commands(command) {
|
if let Some(commands) = parse_shell_lc_commands(command) {
|
||||||
return commands;
|
return commands;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -981,7 +994,7 @@ fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Normalize a command by:
|
/// Normalize a command by:
|
||||||
/// - Removing `yes`/`no`/`bash -c`/`bash -lc` prefixes.
|
/// - Removing `yes`/`no`/`bash -c`/`bash -lc`/`zsh -c`/`zsh -lc` prefixes.
|
||||||
/// - Splitting on `|` and `&&`/`||`/`;
|
/// - Splitting on `|` and `&&`/`||`/`;
|
||||||
fn normalize_tokens(cmd: &[String]) -> Vec<String> {
|
fn normalize_tokens(cmd: &[String]) -> Vec<String> {
|
||||||
match cmd {
|
match cmd {
|
||||||
@@ -993,9 +1006,10 @@ fn normalize_tokens(cmd: &[String]) -> Vec<String> {
|
|||||||
// Do not re-shlex already-tokenized input; just drop the prefix.
|
// Do not re-shlex already-tokenized input; just drop the prefix.
|
||||||
rest.to_vec()
|
rest.to_vec()
|
||||||
}
|
}
|
||||||
[bash, flag, script] if bash == "bash" && (flag == "-c" || flag == "-lc") => {
|
[shell, flag, script]
|
||||||
shlex_split(script)
|
if (shell == "bash" || shell == "zsh") && (flag == "-c" || flag == "-lc") =>
|
||||||
.unwrap_or_else(|| vec!["bash".to_string(), flag.clone(), script.clone()])
|
{
|
||||||
|
shlex_split(script).unwrap_or_else(|| vec![shell.clone(), flag.clone(), script.clone()])
|
||||||
}
|
}
|
||||||
_ => cmd.to_vec(),
|
_ => cmd.to_vec(),
|
||||||
}
|
}
|
||||||
@@ -1151,19 +1165,19 @@ fn parse_find_query_and_path(tail: &[String]) -> (Option<String>, Option<String>
|
|||||||
(query, path)
|
(query, path)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_bash_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
|
fn parse_shell_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
|
||||||
let [bash, flag, script] = original else {
|
let [shell, flag, script] = original else {
|
||||||
return None;
|
return None;
|
||||||
};
|
};
|
||||||
if bash != "bash" || flag != "-lc" {
|
if flag != "-lc" || !(shell == "bash" || shell == "zsh") {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
if let Some(tree) = try_parse_bash(script)
|
if let Some(tree) = try_parse_shell(script)
|
||||||
&& let Some(all_commands) = try_parse_word_only_commands_sequence(&tree, script)
|
&& let Some(all_commands) = try_parse_word_only_commands_sequence(&tree, script)
|
||||||
&& !all_commands.is_empty()
|
&& !all_commands.is_empty()
|
||||||
{
|
{
|
||||||
let script_tokens = shlex_split(script)
|
let script_tokens = shlex_split(script)
|
||||||
.unwrap_or_else(|| vec!["bash".to_string(), flag.clone(), script.clone()]);
|
.unwrap_or_else(|| vec![shell.clone(), flag.clone(), script.clone()]);
|
||||||
// Strip small formatting helpers (e.g., head/tail/awk/wc/etc) so we
|
// Strip small formatting helpers (e.g., head/tail/awk/wc/etc) so we
|
||||||
// bias toward the primary command when pipelines are present.
|
// bias toward the primary command when pipelines are present.
|
||||||
// First, drop obvious small formatting helpers (e.g., wc/awk/etc).
|
// First, drop obvious small formatting helpers (e.g., wc/awk/etc).
|
||||||
|
|||||||
Reference in New Issue
Block a user