As stated in `codex-rs/README.md`: Today, Codex CLI is written in TypeScript and requires Node.js 22+ to run it. For a number of users, this runtime requirement inhibits adoption: they would be better served by a standalone executable. As maintainers, we want Codex to run efficiently in a wide range of environments with minimal overhead. We also want to take advantage of operating system-specific APIs to provide better sandboxing, where possible. To that end, we are moving forward with a Rust implementation of Codex CLI contained in this folder, which has the following benefits: - The CLI compiles to small, standalone, platform-specific binaries. - Can make direct, native calls to [seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and [landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in order to support sandboxing on Linux. - No runtime garbage collection, resulting in lower memory consumption and better, more predictable performance. Currently, the Rust implementation is materially behind the TypeScript implementation in functionality, so continue to use the TypeScript implmentation for the time being. We will publish native executables via GitHub Releases as soon as we feel the Rust version is usable.
333 lines
11 KiB
Rust
333 lines
11 KiB
Rust
use tree_sitter::Parser;
|
||
use tree_sitter::Tree;
|
||
use tree_sitter_bash::LANGUAGE as BASH;
|
||
|
||
pub fn is_known_safe_command(command: &[String]) -> bool {
|
||
if is_safe_to_call_with_exec(command) {
|
||
return true;
|
||
}
|
||
|
||
// TODO(mbolin): Also support safe commands that are piped together such
|
||
// as `cat foo | wc -l`.
|
||
matches!(
|
||
command,
|
||
[bash, flag, script]
|
||
if bash == "bash"
|
||
&& flag == "-lc"
|
||
&& try_parse_bash(script).and_then(|tree|
|
||
try_parse_single_word_only_command(&tree, script)).is_some_and(|parsed_bash_command| is_safe_to_call_with_exec(&parsed_bash_command))
|
||
)
|
||
}
|
||
|
||
fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||
let cmd0 = command.first().map(String::as_str);
|
||
|
||
match cmd0 {
|
||
Some(
|
||
"cat" | "cd" | "echo" | "grep" | "head" | "ls" | "pwd" | "rg" | "tail" | "wc" | "which",
|
||
) => true,
|
||
|
||
Some("find") => {
|
||
// Certain options to `find` can delete files, write to files, or
|
||
// execute arbitrary commands, so we cannot auto-approve the
|
||
// invocation of `find` in such cases.
|
||
#[rustfmt::skip]
|
||
const UNSAFE_FIND_OPTIONS: &[&str] = &[
|
||
// Options that can execute arbitrary commands.
|
||
"-exec", "-execdir", "-ok", "-okdir",
|
||
// Option that deletes matching files.
|
||
"-delete",
|
||
// Options that write pathnames to a file.
|
||
"-fls", "-fprint", "-fprint0", "-fprintf",
|
||
];
|
||
|
||
!command
|
||
.iter()
|
||
.any(|arg| UNSAFE_FIND_OPTIONS.contains(&arg.as_str()))
|
||
}
|
||
|
||
// Git
|
||
Some("git") => matches!(
|
||
command.get(1).map(String::as_str),
|
||
Some("branch" | "status" | "log" | "diff" | "show")
|
||
),
|
||
|
||
// Rust
|
||
Some("cargo") if command.get(1).map(String::as_str) == Some("check") => true,
|
||
|
||
// Special-case `sed -n {N|M,N}p FILE`
|
||
Some("sed")
|
||
if {
|
||
command.len() == 4
|
||
&& command.get(1).map(String::as_str) == Some("-n")
|
||
&& is_valid_sed_n_arg(command.get(2).map(String::as_str))
|
||
&& command.get(3).map(String::is_empty) == Some(false)
|
||
} =>
|
||
{
|
||
true
|
||
}
|
||
|
||
// ── anything else ─────────────────────────────────────────────────
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> {
|
||
let lang = BASH.into();
|
||
let mut parser = Parser::new();
|
||
parser.set_language(&lang).expect("load bash grammar");
|
||
|
||
let old_tree: Option<&Tree> = None;
|
||
parser.parse(bash_lc_arg, old_tree)
|
||
}
|
||
|
||
/// If `tree` represents a single Bash command whose name and every argument is
|
||
/// an ordinary `word`, return those words in order; otherwise, return `None`.
|
||
///
|
||
/// `src` must be the exact source string that was parsed into `tree`, so we can
|
||
/// extract the text for every node.
|
||
pub fn try_parse_single_word_only_command(tree: &Tree, src: &str) -> Option<Vec<String>> {
|
||
// Any parse error is an immediate rejection.
|
||
if tree.root_node().has_error() {
|
||
return None;
|
||
}
|
||
|
||
// (program …) with exactly one statement
|
||
let root = tree.root_node();
|
||
if root.kind() != "program" || root.named_child_count() != 1 {
|
||
return None;
|
||
}
|
||
|
||
let cmd = root.named_child(0)?; // (command …)
|
||
if cmd.kind() != "command" {
|
||
return None;
|
||
}
|
||
|
||
let mut words = Vec::new();
|
||
let mut cursor = cmd.walk();
|
||
|
||
for child in cmd.named_children(&mut cursor) {
|
||
match child.kind() {
|
||
// The command name node wraps one `word` child.
|
||
"command_name" => {
|
||
let word_node = child.named_child(0)?; // make sure it's only a word
|
||
if word_node.kind() != "word" {
|
||
return None;
|
||
}
|
||
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||
}
|
||
// Positional‑argument word (allowed).
|
||
"word" | "number" => {
|
||
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||
}
|
||
"string" => {
|
||
if child.child_count() == 3
|
||
&& child.child(0)?.kind() == "\""
|
||
&& child.child(1)?.kind() == "string_content"
|
||
&& child.child(2)?.kind() == "\""
|
||
{
|
||
words.push(child.child(1)?.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||
} else {
|
||
// Anything else means the command is *not* plain words.
|
||
return None;
|
||
}
|
||
}
|
||
"concatenation" => {
|
||
// TODO: Consider things like `'ab\'a'`.
|
||
return None;
|
||
}
|
||
"raw_string" => {
|
||
// Raw string is a single word, but we need to strip the quotes.
|
||
let raw_string = child.utf8_text(src.as_bytes()).ok()?;
|
||
let stripped = raw_string
|
||
.strip_prefix('\'')
|
||
.and_then(|s| s.strip_suffix('\''));
|
||
if let Some(stripped) = stripped {
|
||
words.push(stripped.to_owned());
|
||
} else {
|
||
return None;
|
||
}
|
||
}
|
||
// Anything else means the command is *not* plain words.
|
||
_ => return None,
|
||
}
|
||
}
|
||
|
||
Some(words)
|
||
}
|
||
|
||
/* ----------------------------------------------------------
|
||
Example
|
||
---------------------------------------------------------- */
|
||
|
||
/// Returns true if `arg` matches /^(\d+,)?\d+p$/
|
||
fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
|
||
// unwrap or bail
|
||
let s = match arg {
|
||
Some(s) => s,
|
||
None => return false,
|
||
};
|
||
|
||
// must end with 'p', strip it
|
||
let core = match s.strip_suffix('p') {
|
||
Some(rest) => rest,
|
||
None => return false,
|
||
};
|
||
|
||
// split on ',' and ensure 1 or 2 numeric parts
|
||
let parts: Vec<&str> = core.split(',').collect();
|
||
match parts.as_slice() {
|
||
// single number, e.g. "10"
|
||
[num] => !num.is_empty() && num.chars().all(|c| c.is_ascii_digit()),
|
||
|
||
// two numbers, e.g. "1,5"
|
||
[a, b] => {
|
||
!a.is_empty()
|
||
&& !b.is_empty()
|
||
&& a.chars().all(|c| c.is_ascii_digit())
|
||
&& b.chars().all(|c| c.is_ascii_digit())
|
||
}
|
||
|
||
// anything else (more than one comma) is invalid
|
||
_ => false,
|
||
}
|
||
}
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
fn vec_str(args: &[&str]) -> Vec<String> {
|
||
args.iter().map(|s| s.to_string()).collect()
|
||
}
|
||
|
||
#[test]
|
||
fn known_safe_examples() {
|
||
assert!(is_safe_to_call_with_exec(&vec_str(&["ls"])));
|
||
assert!(is_safe_to_call_with_exec(&vec_str(&["git", "status"])));
|
||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||
"sed", "-n", "1,5p", "file.txt"
|
||
])));
|
||
|
||
// Safe `find` command (no unsafe options).
|
||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||
"find", ".", "-name", "file.txt"
|
||
])));
|
||
}
|
||
|
||
#[test]
|
||
fn unknown_or_partial() {
|
||
assert!(!is_safe_to_call_with_exec(&vec_str(&["foo"])));
|
||
assert!(!is_safe_to_call_with_exec(&vec_str(&["git", "fetch"])));
|
||
assert!(!is_safe_to_call_with_exec(&vec_str(&[
|
||
"sed", "-n", "xp", "file.txt"
|
||
])));
|
||
|
||
// Unsafe `find` commands.
|
||
for args in [
|
||
vec_str(&["find", ".", "-name", "file.txt", "-exec", "rm", "{}", ";"]),
|
||
vec_str(&[
|
||
"find", ".", "-name", "*.py", "-execdir", "python3", "{}", ";",
|
||
]),
|
||
vec_str(&["find", ".", "-name", "file.txt", "-ok", "rm", "{}", ";"]),
|
||
vec_str(&["find", ".", "-name", "*.py", "-okdir", "python3", "{}", ";"]),
|
||
vec_str(&["find", ".", "-delete", "-name", "file.txt"]),
|
||
vec_str(&["find", ".", "-fls", "/etc/passwd"]),
|
||
vec_str(&["find", ".", "-fprint", "/etc/passwd"]),
|
||
vec_str(&["find", ".", "-fprint0", "/etc/passwd"]),
|
||
vec_str(&["find", ".", "-fprintf", "/root/suid.txt", "%#m %u %p\n"]),
|
||
] {
|
||
assert!(
|
||
!is_safe_to_call_with_exec(&args),
|
||
"expected {:?} to be unsafe",
|
||
args
|
||
);
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn bash_lc_safe_examples() {
|
||
assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls"])));
|
||
assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls -1"])));
|
||
assert!(is_known_safe_command(&vec_str(&[
|
||
"bash",
|
||
"-lc",
|
||
"git status"
|
||
])));
|
||
assert!(is_known_safe_command(&vec_str(&[
|
||
"bash",
|
||
"-lc",
|
||
"grep -R \"Cargo.toml\" -n"
|
||
])));
|
||
assert!(is_known_safe_command(&vec_str(&[
|
||
"bash",
|
||
"-lc",
|
||
"sed -n 1,5p file.txt"
|
||
])));
|
||
assert!(is_known_safe_command(&vec_str(&[
|
||
"bash",
|
||
"-lc",
|
||
"sed -n '1,5p' file.txt"
|
||
])));
|
||
|
||
assert!(is_known_safe_command(&vec_str(&[
|
||
"bash",
|
||
"-lc",
|
||
"find . -name file.txt"
|
||
])));
|
||
}
|
||
|
||
#[test]
|
||
fn bash_lc_unsafe_examples() {
|
||
assert!(
|
||
!is_known_safe_command(&vec_str(&["bash", "-lc", "git", "status"])),
|
||
"Four arg version is not known to be safe."
|
||
);
|
||
assert!(
|
||
!is_known_safe_command(&vec_str(&["bash", "-lc", "'git status'"])),
|
||
"The extra quoting around 'git status' makes it a program named 'git status' and is therefore unsafe."
|
||
);
|
||
|
||
assert!(
|
||
!is_known_safe_command(&vec_str(&["bash", "-lc", "find . -name file.txt -delete"])),
|
||
"Unsafe find option should not be auto‑approved."
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn test_try_parse_single_word_only_command() {
|
||
let script_with_single_quoted_string = "sed -n '1,5p' file.txt";
|
||
let parsed_words = try_parse_bash(script_with_single_quoted_string)
|
||
.and_then(|tree| {
|
||
try_parse_single_word_only_command(&tree, script_with_single_quoted_string)
|
||
})
|
||
.unwrap();
|
||
assert_eq!(
|
||
vec![
|
||
"sed".to_string(),
|
||
"-n".to_string(),
|
||
// Ensure the single quotes are properly removed.
|
||
"1,5p".to_string(),
|
||
"file.txt".to_string()
|
||
],
|
||
parsed_words,
|
||
);
|
||
|
||
let script_with_number_arg = "ls -1";
|
||
let parsed_words = try_parse_bash(script_with_number_arg)
|
||
.and_then(|tree| try_parse_single_word_only_command(&tree, script_with_number_arg))
|
||
.unwrap();
|
||
assert_eq!(vec!["ls", "-1"], parsed_words,);
|
||
|
||
let script_with_double_quoted_string_with_no_funny_stuff_arg = "grep -R \"Cargo.toml\" -n";
|
||
let parsed_words = try_parse_bash(script_with_double_quoted_string_with_no_funny_stuff_arg)
|
||
.and_then(|tree| {
|
||
try_parse_single_word_only_command(
|
||
&tree,
|
||
script_with_double_quoted_string_with_no_funny_stuff_arg,
|
||
)
|
||
})
|
||
.unwrap();
|
||
assert_eq!(vec!["grep", "-R", "Cargo.toml", "-n"], parsed_words);
|
||
}
|
||
}
|