Treat zsh -lc like bash -lc (#5411)

Without proper `zsh -lc` parsing, we lose some things like proper command parsing, turn diff tracking, safe command checks, and other things we expect from raw or `bash -lc` commands.
2025-10-20 15:52:25 -07:00
parent 740b4a95f4
commit 32d50bda94
4 changed files with 60 additions and 24 deletions
--- a/codex-rs/core/src/bash.rs
+++ b/codex-rs/core/src/bash.rs
@@ -5,13 +5,13 @@ use tree_sitter_bash::LANGUAGE as BASH;

 /// Parse the provided bash source using tree-sitter-bash, returning a Tree on
 /// success or None if parsing failed.
-pub fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> {
+pub fn try_parse_shell(shell_lc_arg: &str) -> Option<Tree> {
    let lang = BASH.into();
    let mut parser = Parser::new();
    #[expect(clippy::expect_used)]
    parser.set_language(&lang).expect("load bash grammar");
    let old_tree: Option<&Tree> = None;
-    parser.parse(bash_lc_arg, old_tree)
+    parser.parse(shell_lc_arg, old_tree)
 }

 /// Parse a script which may contain multiple simple commands joined only by
@@ -88,18 +88,19 @@ pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<V
    Some(commands)
 }

-/// Returns the sequence of plain commands within a `bash -lc "..."` invocation
-/// when the script only contains word-only commands joined by safe operators.
-pub fn parse_bash_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
-    let [bash, flag, script] = command else {
+/// Returns the sequence of plain commands within a `bash -lc "..."` or
+/// `zsh -lc "..."` invocation when the script only contains word-only commands
+/// joined by safe operators.
+pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
+    let [shell, flag, script] = command else {
        return None;
    };

-    if bash != "bash" || flag != "-lc" {
+    if flag != "-lc" || !(shell == "bash" || shell == "zsh") {
        return None;
    }

-    let tree = try_parse_bash(script)?;
+    let tree = try_parse_shell(script)?;
    try_parse_word_only_commands_sequence(&tree, script)
 }

@@ -154,7 +155,7 @@ mod tests {
    use super::*;

    fn parse_seq(src: &str) -> Option<Vec<Vec<String>>> {
-        let tree = try_parse_bash(src)?;
+        let tree = try_parse_shell(src)?;
        try_parse_word_only_commands_sequence(&tree, src)
    }

@@ -234,4 +235,11 @@ mod tests {
    fn rejects_trailing_operator_parse_error() {
        assert!(parse_seq("ls &&").is_none());
    }
+
+    #[test]
+    fn parse_zsh_lc_plain_commands() {
+        let command = vec!["zsh".to_string(), "-lc".to_string(), "ls".to_string()];
+        let parsed = parse_shell_lc_plain_commands(&command).unwrap();
+        assert_eq!(parsed, vec![vec!["ls".to_string()]]);
+    }
 }
--- a/codex-rs/core/src/command_safety/is_dangerous_command.rs
+++ b/codex-rs/core/src/command_safety/is_dangerous_command.rs
@@ -1,4 +1,4 @@
-use crate::bash::parse_bash_lc_plain_commands;
+use crate::bash::parse_shell_lc_plain_commands;

 pub fn command_might_be_dangerous(command: &[String]) -> bool {
    if is_dangerous_to_call_with_exec(command) {
@@ -6,7 +6,7 @@ pub fn command_might_be_dangerous(command: &[String]) -> bool {
    }

    // Support `bash -lc "<script>"` where the any part of the script might contain a dangerous command.
-    if let Some(all_commands) = parse_bash_lc_plain_commands(command)
+    if let Some(all_commands) = parse_shell_lc_plain_commands(command)
        && all_commands
            .iter()
            .any(|cmd| is_dangerous_to_call_with_exec(cmd))
@@ -57,6 +57,15 @@ mod tests {
        ])));
    }

+    #[test]
+    fn zsh_git_reset_is_dangerous() {
+        assert!(command_might_be_dangerous(&vec_str(&[
+            "zsh",
+            "-lc",
+            "git reset --hard"
+        ])));
+    }
+
    #[test]
    fn git_status_is_not_dangerous() {
        assert!(!command_might_be_dangerous(&vec_str(&["git", "status"])));
--- a/codex-rs/core/src/command_safety/is_safe_command.rs
+++ b/codex-rs/core/src/command_safety/is_safe_command.rs
@@ -1,4 +1,4 @@
-use crate::bash::parse_bash_lc_plain_commands;
+use crate::bash::parse_shell_lc_plain_commands;

 pub fn is_known_safe_command(command: &[String]) -> bool {
    let command: Vec<String> = command
@@ -29,7 +29,7 @@ pub fn is_known_safe_command(command: &[String]) -> bool {
    // introduce side effects ( "&&", "||", ";", and "|" ). If every
    // individual command in the script is itself a known‑safe command, then
    // the composite expression is considered safe.
-    if let Some(all_commands) = parse_bash_lc_plain_commands(&command)
+    if let Some(all_commands) = parse_shell_lc_plain_commands(&command)
        && !all_commands.is_empty()
        && all_commands
            .iter()
@@ -201,6 +201,11 @@ mod tests {
        ])));
    }

+    #[test]
+    fn zsh_lc_safe_command_sequence() {
+        assert!(is_known_safe_command(&vec_str(&["zsh", "-lc", "ls"])));
+    }
+
    #[test]
    fn unknown_or_partial() {
        assert!(!is_safe_to_call_with_exec(&vec_str(&["foo"])));
--- a/codex-rs/core/src/parse_command.rs
+++ b/codex-rs/core/src/parse_command.rs
@@ -1,4 +1,4 @@
-use crate::bash::try_parse_bash;
+use crate::bash::try_parse_shell;
 use crate::bash::try_parse_word_only_commands_sequence;
 use codex_protocol::parse_command::ParsedCommand;
 use shlex::split as shlex_split;
@@ -193,6 +193,19 @@ mod tests {
        );
    }

+    #[test]
+    fn zsh_lc_supports_cat() {
+        let inner = "cat README.md";
+        assert_parsed(
+            &vec_str(&["zsh", "-lc", inner]),
+            vec![ParsedCommand::Read {
+                cmd: inner.to_string(),
+                name: "README.md".to_string(),
+                path: PathBuf::from("README.md"),
+            }],
+        );
+    }
+
    #[test]
    fn cd_then_cat_is_single_read() {
        assert_parsed(
@@ -843,7 +856,7 @@ mod tests {
 }

 pub fn parse_command_impl(command: &[String]) -> Vec<ParsedCommand> {
-    if let Some(commands) = parse_bash_lc_commands(command) {
+    if let Some(commands) = parse_shell_lc_commands(command) {
        return commands;
    }

@@ -981,7 +994,7 @@ fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
 }

 /// Normalize a command by:
-/// - Removing `yes`/`no`/`bash -c`/`bash -lc` prefixes.
+/// - Removing `yes`/`no`/`bash -c`/`bash -lc`/`zsh -c`/`zsh -lc` prefixes.
 /// - Splitting on `|` and `&&`/`||`/`;
 fn normalize_tokens(cmd: &[String]) -> Vec<String> {
    match cmd {
@@ -993,9 +1006,10 @@ fn normalize_tokens(cmd: &[String]) -> Vec<String> {
            // Do not re-shlex already-tokenized input; just drop the prefix.
            rest.to_vec()
        }
-        [bash, flag, script] if bash == "bash" && (flag == "-c" || flag == "-lc") => {
-            shlex_split(script)
-                .unwrap_or_else(|| vec!["bash".to_string(), flag.clone(), script.clone()])
+        [shell, flag, script]
+            if (shell == "bash" || shell == "zsh") && (flag == "-c" || flag == "-lc") =>
+        {
+            shlex_split(script).unwrap_or_else(|| vec![shell.clone(), flag.clone(), script.clone()])
        }
        _ => cmd.to_vec(),
    }
@@ -1151,19 +1165,19 @@ fn parse_find_query_and_path(tail: &[String]) -> (Option<String>, Option<String>
    (query, path)
 }

-fn parse_bash_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
-    let [bash, flag, script] = original else {
+fn parse_shell_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
+    let [shell, flag, script] = original else {
        return None;
    };
-    if bash != "bash" || flag != "-lc" {
+    if flag != "-lc" || !(shell == "bash" || shell == "zsh") {
        return None;
    }
-    if let Some(tree) = try_parse_bash(script)
+    if let Some(tree) = try_parse_shell(script)
        && let Some(all_commands) = try_parse_word_only_commands_sequence(&tree, script)
        && !all_commands.is_empty()
    {
        let script_tokens = shlex_split(script)
-            .unwrap_or_else(|| vec!["bash".to_string(), flag.clone(), script.clone()]);
+            .unwrap_or_else(|| vec![shell.clone(), flag.clone(), script.clone()]);
        // Strip small formatting helpers (e.g., head/tail/awk/wc/etc) so we
        // bias toward the primary command when pipelines are present.
        // First, drop obvious small formatting helpers (e.g., wc/awk/etc).