Colocate more of bash parsing (#6489)

Move a few callsites that were detecting `bash -lc` into a shared
helper.
This commit is contained in:
pakrym-oai
2025-11-10 18:38:36 -08:00
committed by GitHub
parent 6c36318bd8
commit bb7b0213a8
3 changed files with 57 additions and 29 deletions

View File

@@ -88,17 +88,33 @@ pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<V
Some(commands)
}
pub fn is_well_known_sh_shell(shell: &str) -> bool {
if shell == "bash" || shell == "zsh" {
return true;
}
let shell_name = std::path::Path::new(shell)
.file_name()
.and_then(|s| s.to_str())
.unwrap_or(shell);
matches!(shell_name, "bash" | "zsh")
}
pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
let [shell, flag, script] = command else {
return None;
};
if flag != "-lc" || !is_well_known_sh_shell(shell) {
return None;
}
Some((shell, script))
}
/// Returns the sequence of plain commands within a `bash -lc "..."` or
/// `zsh -lc "..."` invocation when the script only contains word-only commands
/// joined by safe operators.
pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
let [shell, flag, script] = command else {
return None;
};
if flag != "-lc" || !(shell == "bash" || shell == "zsh") {
return None;
}
let (_, script) = extract_bash_command(command)?;
let tree = try_parse_shell(script)?;
try_parse_word_only_commands_sequence(&tree, script)

View File

@@ -1,3 +1,4 @@
use crate::bash::extract_bash_command;
use crate::bash::try_parse_shell;
use crate::bash::try_parse_word_only_commands_sequence;
use codex_protocol::parse_command::ParsedCommand;
@@ -853,6 +854,29 @@ mod tests {
}],
);
}
#[test]
fn bin_bash_lc_sed() {
assert_parsed(
&shlex_split_safe("/bin/bash -lc 'sed -n '1,10p' Cargo.toml'"),
vec![ParsedCommand::Read {
cmd: "sed -n '1,10p' Cargo.toml".to_string(),
name: "Cargo.toml".to_string(),
path: PathBuf::from("Cargo.toml"),
}],
);
}
#[test]
fn bin_zsh_lc_sed() {
assert_parsed(
&shlex_split_safe("/bin/zsh -lc 'sed -n '1,10p' Cargo.toml'"),
vec![ParsedCommand::Read {
cmd: "sed -n '1,10p' Cargo.toml".to_string(),
name: "Cargo.toml".to_string(),
path: PathBuf::from("Cargo.toml"),
}],
);
}
}
pub fn parse_command_impl(command: &[String]) -> Vec<ParsedCommand> {
@@ -1166,18 +1190,13 @@ fn parse_find_query_and_path(tail: &[String]) -> (Option<String>, Option<String>
}
fn parse_shell_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
let [shell, flag, script] = original else {
return None;
};
if flag != "-lc" || !(shell == "bash" || shell == "zsh") {
return None;
}
let (_, script) = extract_bash_command(original)?;
if let Some(tree) = try_parse_shell(script)
&& let Some(all_commands) = try_parse_word_only_commands_sequence(&tree, script)
&& !all_commands.is_empty()
{
let script_tokens = shlex_split(script)
.unwrap_or_else(|| vec![shell.clone(), flag.clone(), script.clone()]);
let script_tokens = shlex_split(script).unwrap_or_else(|| vec![script.to_string()]);
// Strip small formatting helpers (e.g., head/tail/awk/wc/etc) so we
// bias toward the primary command when pipelines are present.
// First, drop obvious small formatting helpers (e.g., wc/awk/etc).
@@ -1186,7 +1205,7 @@ fn parse_shell_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
let filtered_commands = drop_small_formatting_commands(all_commands);
if filtered_commands.is_empty() {
return Some(vec![ParsedCommand::Unknown {
cmd: script.clone(),
cmd: script.to_string(),
}]);
}
// Build parsed commands, tracking `cd` segments to compute effective file paths.
@@ -1250,7 +1269,7 @@ fn parse_shell_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
});
if has_pipe && has_sed_n {
ParsedCommand::Read {
cmd: script.clone(),
cmd: script.to_string(),
name,
path,
}
@@ -1295,7 +1314,7 @@ fn parse_shell_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
return Some(commands);
}
Some(vec![ParsedCommand::Unknown {
cmd: script.clone(),
cmd: script.to_string(),
}])
}

View File

@@ -1,6 +1,7 @@
use std::path::Path;
use std::path::PathBuf;
use codex_core::bash::extract_bash_command;
use dirs::home_dir;
use shlex::try_join;
@@ -8,19 +9,11 @@ pub(crate) fn escape_command(command: &[String]) -> String {
try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "))
}
fn is_login_shell_with_lc(shell: &str) -> bool {
let shell_name = std::path::Path::new(shell)
.file_name()
.and_then(|s| s.to_str())
.unwrap_or(shell);
matches!(shell_name, "bash" | "zsh")
}
pub(crate) fn strip_bash_lc_and_escape(command: &[String]) -> String {
match command {
[first, second, third] if is_login_shell_with_lc(first) && second == "-lc" => third.clone(),
_ => escape_command(command),
if let Some((_, script)) = extract_bash_command(command) {
return script.to_string();
}
escape_command(command)
}
/// If `path` is absolute and inside $HOME, return the part *after* the home