feat: introduce codex_execpolicy crate for defining "safe" commands (#634)

As described in detail in `codex-rs/execpolicy/README.md` introduced in this PR, `execpolicy` is a tool that lets you define a set of _patterns_ used to match [`execv(3)`](https://linux.die.net/man/3/execv) invocations. When a pattern is matched, `execpolicy` returns the parsed version in a structured form that is amenable to static analysis. The primary use case is to define patterns match commands that should be auto-approved by a tool such as Codex. This supports a richer pattern matching mechanism that the sort of prefix-matching we have done to date, e.g.: 5e40d9d221/codex-cli/src/approvals.ts (L333-L354) Note we are still playing with the API and the `system_path` option in particular still needs some work.
2025-04-24 17:14:47 -07:00
parent 5e40d9d221
commit 58f0e5ab74
29 changed files with 3830 additions and 47 deletions
--- a/codex-rs/execpolicy/tests/bad.rs
+++ b/codex-rs/execpolicy/tests/bad.rs
@@ -0,0 +1,9 @@
+use codex_execpolicy::get_default_policy;
+use codex_execpolicy::NegativeExamplePassedCheck;
+
+#[test]
+fn verify_everything_in_bad_list_is_rejected() {
+    let policy = get_default_policy().expect("failed to load default policy");
+    let violations = policy.check_each_bad_list_individually();
+    assert_eq!(Vec::<NegativeExamplePassedCheck>::new(), violations);
+}
--- a/codex-rs/execpolicy/tests/cp.rs
+++ b/codex-rs/execpolicy/tests/cp.rs
@@ -0,0 +1,85 @@
+extern crate codex_execpolicy;
+
+use codex_execpolicy::get_default_policy;
+use codex_execpolicy::ArgMatcher;
+use codex_execpolicy::ArgType;
+use codex_execpolicy::Error;
+use codex_execpolicy::ExecCall;
+use codex_execpolicy::MatchedArg;
+use codex_execpolicy::MatchedExec;
+use codex_execpolicy::Policy;
+use codex_execpolicy::Result;
+use codex_execpolicy::ValidExec;
+
+fn setup() -> Policy {
+    get_default_policy().expect("failed to load default policy")
+}
+
+#[test]
+fn test_cp_no_args() {
+    let policy = setup();
+    let cp = ExecCall::new("cp", &[]);
+    assert_eq!(
+        Err(Error::NotEnoughArgs {
+            program: "cp".to_string(),
+            args: vec![],
+            arg_patterns: vec![ArgMatcher::ReadableFiles, ArgMatcher::WriteableFile]
+        }),
+        policy.check(&cp)
+    )
+}
+
+#[test]
+fn test_cp_one_arg() {
+    let policy = setup();
+    let cp = ExecCall::new("cp", &["foo/bar"]);
+
+    assert_eq!(
+        Err(Error::VarargMatcherDidNotMatchAnything {
+            program: "cp".to_string(),
+            matcher: ArgMatcher::ReadableFiles,
+        }),
+        policy.check(&cp)
+    );
+}
+
+#[test]
+fn test_cp_one_file() -> Result<()> {
+    let policy = setup();
+    let cp = ExecCall::new("cp", &["foo/bar", "../baz"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec::new(
+                "cp",
+                vec![
+                    MatchedArg::new(0, ArgType::ReadableFile, "foo/bar")?,
+                    MatchedArg::new(1, ArgType::WriteableFile, "../baz")?,
+                ],
+                &["/bin/cp", "/usr/bin/cp"]
+            )
+        }),
+        policy.check(&cp)
+    );
+    Ok(())
+}
+
+#[test]
+fn test_cp_multiple_files() -> Result<()> {
+    let policy = setup();
+    let cp = ExecCall::new("cp", &["foo", "bar", "baz"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec::new(
+                "cp",
+                vec![
+                    MatchedArg::new(0, ArgType::ReadableFile, "foo")?,
+                    MatchedArg::new(1, ArgType::ReadableFile, "bar")?,
+                    MatchedArg::new(2, ArgType::WriteableFile, "baz")?,
+                ],
+                &["/bin/cp", "/usr/bin/cp"]
+            )
+        }),
+        policy.check(&cp)
+    );
+    Ok(())
+}
--- a/codex-rs/execpolicy/tests/good.rs
+++ b/codex-rs/execpolicy/tests/good.rs
@@ -0,0 +1,9 @@
+use codex_execpolicy::get_default_policy;
+use codex_execpolicy::PositiveExampleFailedCheck;
+
+#[test]
+fn verify_everything_in_good_list_is_allowed() {
+    let policy = get_default_policy().expect("failed to load default policy");
+    let violations = policy.check_each_good_list_individually();
+    assert_eq!(Vec::<PositiveExampleFailedCheck>::new(), violations);
+}
--- a/codex-rs/execpolicy/tests/head.rs
+++ b/codex-rs/execpolicy/tests/head.rs
@@ -0,0 +1,132 @@
+use codex_execpolicy::get_default_policy;
+use codex_execpolicy::ArgMatcher;
+use codex_execpolicy::ArgType;
+use codex_execpolicy::Error;
+use codex_execpolicy::ExecCall;
+use codex_execpolicy::MatchedArg;
+use codex_execpolicy::MatchedExec;
+use codex_execpolicy::MatchedOpt;
+use codex_execpolicy::Policy;
+use codex_execpolicy::Result;
+use codex_execpolicy::ValidExec;
+
+extern crate codex_execpolicy;
+
+fn setup() -> Policy {
+    get_default_policy().expect("failed to load default policy")
+}
+
+#[test]
+fn test_head_no_args() {
+    let policy = setup();
+    let head = ExecCall::new("head", &[]);
+    // It is actually valid to call `head` without arguments: it will read from
+    // stdin instead of from a file. Though recall that a command rejected by
+    // the policy is not "unsafe:" it just means that this library cannot
+    // *guarantee* that the command is safe.
+    //
+    // If we start verifying individual components of a shell command, such as:
+    // `find . -name | head -n 10`, then it might be important to allow the
+    // no-arg case.
+    assert_eq!(
+        Err(Error::VarargMatcherDidNotMatchAnything {
+            program: "head".to_string(),
+            matcher: ArgMatcher::ReadableFiles,
+        }),
+        policy.check(&head)
+    )
+}
+
+#[test]
+fn test_head_one_file_no_flags() -> Result<()> {
+    let policy = setup();
+    let head = ExecCall::new("head", &["src/extension.ts"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec::new(
+                "head",
+                vec![MatchedArg::new(
+                    0,
+                    ArgType::ReadableFile,
+                    "src/extension.ts"
+                )?],
+                &["/bin/head", "/usr/bin/head"]
+            )
+        }),
+        policy.check(&head)
+    );
+    Ok(())
+}
+
+#[test]
+fn test_head_one_flag_one_file() -> Result<()> {
+    let policy = setup();
+    let head = ExecCall::new("head", &["-n", "100", "src/extension.ts"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec {
+                program: "head".to_string(),
+                flags: vec![],
+                opts: vec![MatchedOpt::new("-n", "100", ArgType::PositiveInteger).unwrap()],
+                args: vec![MatchedArg::new(
+                    2,
+                    ArgType::ReadableFile,
+                    "src/extension.ts"
+                )?],
+                system_path: vec!["/bin/head".to_string(), "/usr/bin/head".to_string()],
+            }
+        }),
+        policy.check(&head)
+    );
+    Ok(())
+}
+
+#[test]
+fn test_head_invalid_n_as_0() {
+    let policy = setup();
+    let head = ExecCall::new("head", &["-n", "0", "src/extension.ts"]);
+    assert_eq!(
+        Err(Error::InvalidPositiveInteger {
+            value: "0".to_string(),
+        }),
+        policy.check(&head)
+    )
+}
+
+#[test]
+fn test_head_invalid_n_as_nonint_float() {
+    let policy = setup();
+    let head = ExecCall::new("head", &["-n", "1.5", "src/extension.ts"]);
+    assert_eq!(
+        Err(Error::InvalidPositiveInteger {
+            value: "1.5".to_string(),
+        }),
+        policy.check(&head)
+    )
+}
+
+#[test]
+fn test_head_invalid_n_as_float() {
+    let policy = setup();
+    let head = ExecCall::new("head", &["-n", "1.0", "src/extension.ts"]);
+    assert_eq!(
+        Err(Error::InvalidPositiveInteger {
+            value: "1.0".to_string(),
+        }),
+        policy.check(&head)
+    )
+}
+
+#[test]
+fn test_head_invalid_n_as_negative_int() {
+    let policy = setup();
+    let head = ExecCall::new("head", &["-n", "-1", "src/extension.ts"]);
+    assert_eq!(
+        Err(Error::OptionFollowedByOptionInsteadOfValue {
+            program: "head".to_string(),
+            option: "-n".to_string(),
+            value: "-1".to_string(),
+        }),
+        policy.check(&head)
+    )
+}
--- a/codex-rs/execpolicy/tests/literal.rs
+++ b/codex-rs/execpolicy/tests/literal.rs
@@ -0,0 +1,50 @@
+use codex_execpolicy::ArgType;
+use codex_execpolicy::Error;
+use codex_execpolicy::ExecCall;
+use codex_execpolicy::MatchedArg;
+use codex_execpolicy::MatchedExec;
+use codex_execpolicy::PolicyParser;
+use codex_execpolicy::Result;
+use codex_execpolicy::ValidExec;
+
+extern crate codex_execpolicy;
+
+#[test]
+fn test_invalid_subcommand() -> Result<()> {
+    let unparsed_policy = r#"
+define_program(
+    program="fake_executable",
+    args=["subcommand", "sub-subcommand"],
+)
+"#;
+    let parser = PolicyParser::new("test_invalid_subcommand", unparsed_policy);
+    let policy = parser.parse().expect("failed to parse policy");
+    let valid_call = ExecCall::new("fake_executable", &["subcommand", "sub-subcommand"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec::new(
+                "fake_executable",
+                vec![
+                    MatchedArg::new(0, ArgType::Literal("subcommand".to_string()), "subcommand")?,
+                    MatchedArg::new(
+                        1,
+                        ArgType::Literal("sub-subcommand".to_string()),
+                        "sub-subcommand"
+                    )?,
+                ],
+                &[]
+            )
+        }),
+        policy.check(&valid_call)
+    );
+
+    let invalid_call = ExecCall::new("fake_executable", &["subcommand", "not-a-real-subcommand"]);
+    assert_eq!(
+        Err(Error::LiteralValueDidNotMatch {
+            expected: "sub-subcommand".to_string(),
+            actual: "not-a-real-subcommand".to_string()
+        }),
+        policy.check(&invalid_call)
+    );
+    Ok(())
+}
--- a/codex-rs/execpolicy/tests/ls.rs
+++ b/codex-rs/execpolicy/tests/ls.rs
@@ -0,0 +1,166 @@
+extern crate codex_execpolicy;
+
+use codex_execpolicy::get_default_policy;
+use codex_execpolicy::ArgType;
+use codex_execpolicy::Error;
+use codex_execpolicy::ExecCall;
+use codex_execpolicy::MatchedArg;
+use codex_execpolicy::MatchedExec;
+use codex_execpolicy::MatchedFlag;
+use codex_execpolicy::Policy;
+use codex_execpolicy::Result;
+use codex_execpolicy::ValidExec;
+
+fn setup() -> Policy {
+    get_default_policy().expect("failed to load default policy")
+}
+
+#[test]
+fn test_ls_no_args() {
+    let policy = setup();
+    let ls = ExecCall::new("ls", &[]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec::new("ls", vec![], &["/bin/ls", "/usr/bin/ls"])
+        }),
+        policy.check(&ls)
+    );
+}
+
+#[test]
+fn test_ls_dash_a_dash_l() {
+    let policy = setup();
+    let args = &["-a", "-l"];
+    let ls_a_l = ExecCall::new("ls", args);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec {
+                program: "ls".into(),
+                flags: vec![MatchedFlag::new("-a"), MatchedFlag::new("-l")],
+                system_path: ["/bin/ls".into(), "/usr/bin/ls".into()].into(),
+                ..Default::default()
+            }
+        }),
+        policy.check(&ls_a_l)
+    );
+}
+
+#[test]
+fn test_ls_dash_z() {
+    let policy = setup();
+
+    // -z is currently an invalid option for ls, but it has so many options,
+    // perhaps it will get added at some point...
+    let ls_z = ExecCall::new("ls", &["-z"]);
+    assert_eq!(
+        Err(Error::UnknownOption {
+            program: "ls".into(),
+            option: "-z".into()
+        }),
+        policy.check(&ls_z)
+    );
+}
+
+#[test]
+fn test_ls_dash_al() {
+    let policy = setup();
+
+    // This currently fails, but it should pass once option_bundling=True is implemented.
+    let ls_al = ExecCall::new("ls", &["-al"]);
+    assert_eq!(
+        Err(Error::UnknownOption {
+            program: "ls".into(),
+            option: "-al".into()
+        }),
+        policy.check(&ls_al)
+    );
+}
+
+#[test]
+fn test_ls_one_file_arg() -> Result<()> {
+    let policy = setup();
+
+    let ls_one_file_arg = ExecCall::new("ls", &["foo"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec::new(
+                "ls",
+                vec![MatchedArg::new(0, ArgType::ReadableFile, "foo")?],
+                &["/bin/ls", "/usr/bin/ls"]
+            )
+        }),
+        policy.check(&ls_one_file_arg)
+    );
+    Ok(())
+}
+
+#[test]
+fn test_ls_multiple_file_args() -> Result<()> {
+    let policy = setup();
+
+    let ls_multiple_file_args = ExecCall::new("ls", &["foo", "bar", "baz"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec::new(
+                "ls",
+                vec![
+                    MatchedArg::new(0, ArgType::ReadableFile, "foo")?,
+                    MatchedArg::new(1, ArgType::ReadableFile, "bar")?,
+                    MatchedArg::new(2, ArgType::ReadableFile, "baz")?,
+                ],
+                &["/bin/ls", "/usr/bin/ls"]
+            )
+        }),
+        policy.check(&ls_multiple_file_args)
+    );
+    Ok(())
+}
+
+#[test]
+fn test_ls_multiple_flags_and_file_args() -> Result<()> {
+    let policy = setup();
+
+    let ls_multiple_flags_and_file_args = ExecCall::new("ls", &["-l", "-a", "foo", "bar", "baz"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec {
+                program: "ls".into(),
+                flags: vec![MatchedFlag::new("-l"), MatchedFlag::new("-a")],
+                args: vec![
+                    MatchedArg::new(2, ArgType::ReadableFile, "foo")?,
+                    MatchedArg::new(3, ArgType::ReadableFile, "bar")?,
+                    MatchedArg::new(4, ArgType::ReadableFile, "baz")?,
+                ],
+                system_path: ["/bin/ls".into(), "/usr/bin/ls".into()].into(),
+                ..Default::default()
+            }
+        }),
+        policy.check(&ls_multiple_flags_and_file_args)
+    );
+    Ok(())
+}
+
+#[test]
+fn test_flags_after_file_args() -> Result<()> {
+    let policy = setup();
+
+    // TODO(mbolin): While this is "safe" in that it will not do anything bad
+    // to the user's machine, it will fail because apparently `ls` does not
+    // allow flags after file arguments (as some commands do). We should
+    // extend define_program() to make this part of the configuration so that
+    // this command is disallowed.
+    let ls_flags_after_file_args = ExecCall::new("ls", &["foo", "-l"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec {
+                program: "ls".into(),
+                flags: vec![MatchedFlag::new("-l")],
+                args: vec![MatchedArg::new(0, ArgType::ReadableFile, "foo")?],
+                system_path: ["/bin/ls".into(), "/usr/bin/ls".into()].into(),
+                ..Default::default()
+            }
+        }),
+        policy.check(&ls_flags_after_file_args)
+    );
+    Ok(())
+}
--- a/codex-rs/execpolicy/tests/parse_sed_command.rs
+++ b/codex-rs/execpolicy/tests/parse_sed_command.rs
@@ -0,0 +1,23 @@
+use codex_execpolicy::parse_sed_command;
+use codex_execpolicy::Error;
+
+#[test]
+fn parses_simple_print_command() {
+    assert_eq!(parse_sed_command("122,202p"), Ok(()));
+}
+
+#[test]
+fn rejects_malformed_print_command() {
+    assert_eq!(
+        parse_sed_command("122,202"),
+        Err(Error::SedCommandNotProvablySafe {
+            command: "122,202".to_string(),
+        })
+    );
+    assert_eq!(
+        parse_sed_command("122202"),
+        Err(Error::SedCommandNotProvablySafe {
+            command: "122202".to_string(),
+        })
+    );
+}
--- a/codex-rs/execpolicy/tests/pwd.rs
+++ b/codex-rs/execpolicy/tests/pwd.rs
@@ -0,0 +1,85 @@
+extern crate codex_execpolicy;
+
+use std::vec;
+
+use codex_execpolicy::get_default_policy;
+use codex_execpolicy::Error;
+use codex_execpolicy::ExecCall;
+use codex_execpolicy::MatchedExec;
+use codex_execpolicy::MatchedFlag;
+use codex_execpolicy::Policy;
+use codex_execpolicy::PositionalArg;
+use codex_execpolicy::ValidExec;
+
+fn setup() -> Policy {
+    get_default_policy().expect("failed to load default policy")
+}
+
+#[test]
+fn test_pwd_no_args() {
+    let policy = setup();
+    let pwd = ExecCall::new("pwd", &[]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec {
+                program: "pwd".into(),
+                ..Default::default()
+            }
+        }),
+        policy.check(&pwd)
+    );
+}
+
+#[test]
+fn test_pwd_capital_l() {
+    let policy = setup();
+    let pwd = ExecCall::new("pwd", &["-L"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec {
+                program: "pwd".into(),
+                flags: vec![MatchedFlag::new("-L")],
+                ..Default::default()
+            }
+        }),
+        policy.check(&pwd)
+    );
+}
+
+#[test]
+fn test_pwd_capital_p() {
+    let policy = setup();
+    let pwd = ExecCall::new("pwd", &["-P"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec {
+                program: "pwd".into(),
+                flags: vec![MatchedFlag::new("-P")],
+                ..Default::default()
+            }
+        }),
+        policy.check(&pwd)
+    );
+}
+
+#[test]
+fn test_pwd_extra_args() {
+    let policy = setup();
+    let pwd = ExecCall::new("pwd", &["foo", "bar"]);
+    assert_eq!(
+        Err(Error::UnexpectedArguments {
+            program: "pwd".to_string(),
+            args: vec![
+                PositionalArg {
+                    index: 0,
+                    value: "foo".to_string()
+                },
+                PositionalArg {
+                    index: 1,
+                    value: "bar".to_string()
+                },
+            ],
+        }),
+        policy.check(&pwd)
+    );
+}
--- a/codex-rs/execpolicy/tests/sed.rs
+++ b/codex-rs/execpolicy/tests/sed.rs
@@ -0,0 +1,83 @@
+extern crate codex_execpolicy;
+
+use codex_execpolicy::get_default_policy;
+use codex_execpolicy::ArgType;
+use codex_execpolicy::Error;
+use codex_execpolicy::ExecCall;
+use codex_execpolicy::MatchedArg;
+use codex_execpolicy::MatchedExec;
+use codex_execpolicy::MatchedFlag;
+use codex_execpolicy::MatchedOpt;
+use codex_execpolicy::Policy;
+use codex_execpolicy::Result;
+use codex_execpolicy::ValidExec;
+
+fn setup() -> Policy {
+    get_default_policy().expect("failed to load default policy")
+}
+
+#[test]
+fn test_sed_print_specific_lines() -> Result<()> {
+    let policy = setup();
+    let sed = ExecCall::new("sed", &["-n", "122,202p", "hello.txt"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec {
+                program: "sed".to_string(),
+                flags: vec![MatchedFlag::new("-n")],
+                args: vec![
+                    MatchedArg::new(1, ArgType::SedCommand, "122,202p")?,
+                    MatchedArg::new(2, ArgType::ReadableFile, "hello.txt")?,
+                ],
+                system_path: vec!["/usr/bin/sed".to_string()],
+                ..Default::default()
+            }
+        }),
+        policy.check(&sed)
+    );
+    Ok(())
+}
+
+#[test]
+fn test_sed_print_specific_lines_with_e_flag() -> Result<()> {
+    let policy = setup();
+    let sed = ExecCall::new("sed", &["-n", "-e", "122,202p", "hello.txt"]);
+    assert_eq!(
+        Ok(MatchedExec::Match {
+            exec: ValidExec {
+                program: "sed".to_string(),
+                flags: vec![MatchedFlag::new("-n")],
+                opts: vec![MatchedOpt::new("-e", "122,202p", ArgType::SedCommand).unwrap()],
+                args: vec![MatchedArg::new(3, ArgType::ReadableFile, "hello.txt")?],
+                system_path: vec!["/usr/bin/sed".to_string()],
+            }
+        }),
+        policy.check(&sed)
+    );
+    Ok(())
+}
+
+#[test]
+fn test_sed_reject_dangerous_command() {
+    let policy = setup();
+    let sed = ExecCall::new("sed", &["-e", "s/y/echo hi/e", "hello.txt"]);
+    assert_eq!(
+        Err(Error::SedCommandNotProvablySafe {
+            command: "s/y/echo hi/e".to_string(),
+        }),
+        policy.check(&sed)
+    );
+}
+
+#[test]
+fn test_sed_verify_e_or_pattern_is_required() {
+    let policy = setup();
+    let sed = ExecCall::new("sed", &["122,202p"]);
+    assert_eq!(
+        Err(Error::MissingRequiredOptions {
+            program: "sed".to_string(),
+            options: vec!["-e".to_string()],
+        }),
+        policy.check(&sed)
+    );
+}