feat: introduce codex_execpolicy crate for defining "safe" commands (#634)

As described in detail in `codex-rs/execpolicy/README.md` introduced in
this PR, `execpolicy` is a tool that lets you define a set of _patterns_
used to match [`execv(3)`](https://linux.die.net/man/3/execv)
invocations. When a pattern is matched, `execpolicy` returns the parsed
version in a structured form that is amenable to static analysis.

The primary use case is to define patterns match commands that should be
auto-approved by a tool such as Codex. This supports a richer pattern
matching mechanism that the sort of prefix-matching we have done to
date, e.g.:


5e40d9d221/codex-cli/src/approvals.ts (L333-L354)

Note we are still playing with the API and the `system_path` option in
particular still needs some work.
This commit is contained in:
Michael Bolin
2025-04-24 17:14:47 -07:00
committed by GitHub
parent 5e40d9d221
commit 58f0e5ab74
29 changed files with 3830 additions and 47 deletions

View File

@@ -0,0 +1,118 @@
#![allow(clippy::needless_lifetimes)]
use crate::arg_type::ArgType;
use crate::starlark::values::ValueLike;
use allocative::Allocative;
use derive_more::derive::Display;
use starlark::any::ProvidesStaticType;
use starlark::values::starlark_value;
use starlark::values::string::StarlarkStr;
use starlark::values::AllocValue;
use starlark::values::Heap;
use starlark::values::NoSerialize;
use starlark::values::StarlarkValue;
use starlark::values::UnpackValue;
use starlark::values::Value;
/// Patterns that lists of arguments should be compared against.
#[derive(Clone, Debug, Display, Eq, PartialEq, NoSerialize, ProvidesStaticType, Allocative)]
#[display("{}", self)]
pub enum ArgMatcher {
/// Literal string value.
Literal(String),
/// We cannot say what type of value this should match, but it is *not* a file path.
OpaqueNonFile,
/// Required readable file.
ReadableFile,
/// Required writeable file.
WriteableFile,
/// Non-empty list of readable files.
ReadableFiles,
/// Non-empty list of readable files, or empty list, implying readable cwd.
ReadableFilesOrCwd,
/// Positive integer, like one that is required for `head -n`.
PositiveInteger,
/// Bespoke matcher for safe sed commands.
SedCommand,
/// Matches an arbitrary number of arguments without attributing any
/// particular meaning to them. Caller is responsible for interpreting them.
UnverifiedVarargs,
}
impl ArgMatcher {
pub fn cardinality(&self) -> ArgMatcherCardinality {
match self {
ArgMatcher::Literal(_)
| ArgMatcher::OpaqueNonFile
| ArgMatcher::ReadableFile
| ArgMatcher::WriteableFile
| ArgMatcher::PositiveInteger
| ArgMatcher::SedCommand => ArgMatcherCardinality::One,
ArgMatcher::ReadableFiles => ArgMatcherCardinality::AtLeastOne,
ArgMatcher::ReadableFilesOrCwd | ArgMatcher::UnverifiedVarargs => {
ArgMatcherCardinality::ZeroOrMore
}
}
}
pub fn arg_type(&self) -> ArgType {
match self {
ArgMatcher::Literal(value) => ArgType::Literal(value.clone()),
ArgMatcher::OpaqueNonFile => ArgType::OpaqueNonFile,
ArgMatcher::ReadableFile => ArgType::ReadableFile,
ArgMatcher::WriteableFile => ArgType::WriteableFile,
ArgMatcher::ReadableFiles => ArgType::ReadableFile,
ArgMatcher::ReadableFilesOrCwd => ArgType::ReadableFile,
ArgMatcher::PositiveInteger => ArgType::PositiveInteger,
ArgMatcher::SedCommand => ArgType::SedCommand,
ArgMatcher::UnverifiedVarargs => ArgType::Unknown,
}
}
}
pub enum ArgMatcherCardinality {
One,
AtLeastOne,
ZeroOrMore,
}
impl ArgMatcherCardinality {
pub fn is_exact(&self) -> Option<usize> {
match self {
ArgMatcherCardinality::One => Some(1),
ArgMatcherCardinality::AtLeastOne => None,
ArgMatcherCardinality::ZeroOrMore => None,
}
}
}
impl<'v> AllocValue<'v> for ArgMatcher {
fn alloc_value(self, heap: &'v Heap) -> Value<'v> {
heap.alloc_simple(self)
}
}
#[starlark_value(type = "ArgMatcher")]
impl<'v> StarlarkValue<'v> for ArgMatcher {
type Canonical = ArgMatcher;
}
impl<'v> UnpackValue<'v> for ArgMatcher {
type Error = starlark::Error;
fn unpack_value_impl(value: Value<'v>) -> starlark::Result<Option<Self>> {
if let Some(str) = value.downcast_ref::<StarlarkStr>() {
Ok(Some(ArgMatcher::Literal(str.as_str().to_string())))
} else {
Ok(value.downcast_ref::<ArgMatcher>().cloned())
}
}
}

View File

@@ -0,0 +1,194 @@
use serde::Serialize;
use crate::arg_matcher::ArgMatcher;
use crate::arg_matcher::ArgMatcherCardinality;
use crate::error::Error;
use crate::error::Result;
use crate::valid_exec::MatchedArg;
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
pub struct PositionalArg {
pub index: usize,
pub value: String,
}
pub fn resolve_observed_args_with_patterns(
program: &str,
args: Vec<PositionalArg>,
arg_patterns: &Vec<ArgMatcher>,
) -> Result<Vec<MatchedArg>> {
// Naive matching implementation. Among `arg_patterns`, there is allowed to
// be at most one vararg pattern. Assuming `arg_patterns` is non-empty, we
// end up with either:
//
// - all `arg_patterns` in `prefix_patterns`
// - `arg_patterns` split across `prefix_patterns` (which could be empty),
// one `vararg_pattern`, and `suffix_patterns` (which could also empty).
//
// From there, we start by matching everything in `prefix_patterns`.
// Then we calculate how many positional args should be matched by
// `suffix_patterns` and use that to determine how many args are left to
// be matched by `vararg_pattern` (which could be zero).
//
// After assocating positional args with `vararg_pattern`, we match the
// `suffix_patterns` with the remaining args.
let ParitionedArgs {
num_prefix_args,
num_suffix_args,
prefix_patterns,
suffix_patterns,
vararg_pattern,
} = partition_args(program, arg_patterns)?;
let mut matched_args = Vec::<MatchedArg>::new();
let prefix = get_range_checked(&args, 0..num_prefix_args)?;
let mut prefix_arg_index = 0;
for pattern in prefix_patterns {
let n = pattern.cardinality().is_exact().unwrap();
for positional_arg in &prefix[prefix_arg_index..prefix_arg_index + n] {
let matched_arg = MatchedArg::new(
positional_arg.index,
pattern.arg_type(),
&positional_arg.value.clone(),
)?;
matched_args.push(matched_arg);
}
prefix_arg_index += n;
}
if num_suffix_args > args.len() {
return Err(Error::NotEnoughArgs {
program: program.to_string(),
args,
arg_patterns: arg_patterns.clone(),
});
}
let initial_suffix_args_index = args.len() - num_suffix_args;
if prefix_arg_index > initial_suffix_args_index {
return Err(Error::PrefixOverlapsSuffix {});
}
if let Some(pattern) = vararg_pattern {
let vararg = get_range_checked(&args, prefix_arg_index..initial_suffix_args_index)?;
match pattern.cardinality() {
ArgMatcherCardinality::One => {
return Err(Error::InternalInvariantViolation {
message: "vararg pattern should not have cardinality of one".to_string(),
});
}
ArgMatcherCardinality::AtLeastOne => {
if vararg.is_empty() {
return Err(Error::VarargMatcherDidNotMatchAnything {
program: program.to_string(),
matcher: pattern,
});
} else {
for positional_arg in vararg {
let matched_arg = MatchedArg::new(
positional_arg.index,
pattern.arg_type(),
&positional_arg.value.clone(),
)?;
matched_args.push(matched_arg);
}
}
}
ArgMatcherCardinality::ZeroOrMore => {
for positional_arg in vararg {
let matched_arg = MatchedArg::new(
positional_arg.index,
pattern.arg_type(),
&positional_arg.value.clone(),
)?;
matched_args.push(matched_arg);
}
}
}
}
let suffix = get_range_checked(&args, initial_suffix_args_index..args.len())?;
let mut suffix_arg_index = 0;
for pattern in suffix_patterns {
let n = pattern.cardinality().is_exact().unwrap();
for positional_arg in &suffix[suffix_arg_index..suffix_arg_index + n] {
let matched_arg = MatchedArg::new(
positional_arg.index,
pattern.arg_type(),
&positional_arg.value.clone(),
)?;
matched_args.push(matched_arg);
}
suffix_arg_index += n;
}
if matched_args.len() < args.len() {
let extra_args = get_range_checked(&args, matched_args.len()..args.len())?;
Err(Error::UnexpectedArguments {
program: program.to_string(),
args: extra_args.to_vec(),
})
} else {
Ok(matched_args)
}
}
#[derive(Default)]
struct ParitionedArgs {
num_prefix_args: usize,
num_suffix_args: usize,
prefix_patterns: Vec<ArgMatcher>,
suffix_patterns: Vec<ArgMatcher>,
vararg_pattern: Option<ArgMatcher>,
}
fn partition_args(program: &str, arg_patterns: &Vec<ArgMatcher>) -> Result<ParitionedArgs> {
let mut in_prefix = true;
let mut partitioned_args = ParitionedArgs::default();
for pattern in arg_patterns {
match pattern.cardinality().is_exact() {
Some(n) => {
if in_prefix {
partitioned_args.prefix_patterns.push(pattern.clone());
partitioned_args.num_prefix_args += n;
} else {
partitioned_args.suffix_patterns.push(pattern.clone());
partitioned_args.num_suffix_args += n;
}
}
None => match partitioned_args.vararg_pattern {
None => {
partitioned_args.vararg_pattern = Some(pattern.clone());
in_prefix = false;
}
Some(existing_pattern) => {
return Err(Error::MultipleVarargPatterns {
program: program.to_string(),
first: existing_pattern,
second: pattern.clone(),
});
}
},
}
}
Ok(partitioned_args)
}
fn get_range_checked<T>(vec: &[T], range: std::ops::Range<usize>) -> Result<&[T]> {
if range.start > range.end {
Err(Error::RangeStartExceedsEnd {
start: range.start,
end: range.end,
})
} else if range.end > vec.len() {
Err(Error::RangeEndOutOfBounds {
end: range.end,
len: vec.len(),
})
} else {
Ok(&vec[range])
}
}

View File

@@ -0,0 +1,87 @@
#![allow(clippy::needless_lifetimes)]
use crate::error::Error;
use crate::error::Result;
use crate::sed_command::parse_sed_command;
use allocative::Allocative;
use derive_more::derive::Display;
use serde::Serialize;
use starlark::any::ProvidesStaticType;
use starlark::values::starlark_value;
use starlark::values::StarlarkValue;
#[derive(Debug, Clone, Display, Eq, PartialEq, ProvidesStaticType, Allocative, Serialize)]
#[display("{}", self)]
pub enum ArgType {
Literal(String),
/// We cannot say what this argument represents, but it is *not* a file path.
OpaqueNonFile,
/// A file (or directory) that can be expected to be read as part of this command.
ReadableFile,
/// A file (or directory) that can be expected to be written as part of this command.
WriteableFile,
/// Positive integer, like one that is required for `head -n`.
PositiveInteger,
/// Bespoke arg type for a safe sed command.
SedCommand,
/// Type is unknown: it may or may not be a file.
Unknown,
}
impl ArgType {
pub fn validate(&self, value: &str) -> Result<()> {
match self {
ArgType::Literal(literal_value) => {
if value != *literal_value {
Err(Error::LiteralValueDidNotMatch {
expected: literal_value.clone(),
actual: value.to_string(),
})
} else {
Ok(())
}
}
ArgType::ReadableFile => {
if value.is_empty() {
Err(Error::EmptyFileName {})
} else {
Ok(())
}
}
ArgType::WriteableFile => {
if value.is_empty() {
Err(Error::EmptyFileName {})
} else {
Ok(())
}
}
ArgType::OpaqueNonFile | ArgType::Unknown => Ok(()),
ArgType::PositiveInteger => match value.parse::<u64>() {
Ok(0) => Err(Error::InvalidPositiveInteger {
value: value.to_string(),
}),
Ok(_) => Ok(()),
Err(_) => Err(Error::InvalidPositiveInteger {
value: value.to_string(),
}),
},
ArgType::SedCommand => parse_sed_command(value),
}
}
pub fn might_write_file(&self) -> bool {
match self {
ArgType::WriteableFile | ArgType::Unknown => true,
ArgType::Literal(_)
| ArgType::OpaqueNonFile
| ArgType::PositiveInteger
| ArgType::ReadableFile
| ArgType::SedCommand => false,
}
}
}
#[starlark_value(type = "ArgType")]
impl<'v> StarlarkValue<'v> for ArgType {
type Canonical = ArgType;
}

View File

@@ -0,0 +1,202 @@
"""
define_program() supports the following arguments:
- program: the name of the program
- system_path: list of absolute paths on the system where program can likely be found
- option_bundling (PLANNED): whether to allow bundling of options (e.g. `-al` for `-a -l`)
- combine_format (PLANNED): whether to allow `--option=value` (as opposed to `--option value`)
- options: the command-line flags/options: use flag() and opt() to define these
- args: the rules for what arguments are allowed that are not "options"
- should_match: list of command-line invocations that should be matched by the rule
- should_not_match: list of command-line invocations that should not be matched by the rule
"""
define_program(
program="ls",
system_path=["/bin/ls", "/usr/bin/ls"],
options=[
flag("-1"),
flag("-a"),
flag("-l"),
],
args=[ARG_RFILES_OR_CWD],
)
define_program(
program="cat",
options=[
flag("-b"),
flag("-n"),
flag("-t"),
],
system_path=["/bin/cat", "/usr/bin/cat"],
args=[ARG_RFILES],
should_match=[
["file.txt"],
["-n", "file.txt"],
["-b", "file.txt"],
],
should_not_match=[
# While cat without args is valid, it will read from stdin, which
# does not seem appropriate for our current use case.
[],
# Let's not auto-approve advisory locking.
["-l", "file.txt"],
]
)
define_program(
program="cp",
options=[
flag("-r"),
flag("-R"),
flag("--recursive"),
],
args=[ARG_RFILES, ARG_WFILE],
system_path=["/bin/cp", "/usr/bin/cp"],
should_match=[
["foo", "bar"],
],
should_not_match=[
["foo"],
],
)
define_program(
program="head",
system_path=["/bin/head", "/usr/bin/head"],
options=[
opt("-c", ARG_POS_INT),
opt("-n", ARG_POS_INT),
],
args=[ARG_RFILES],
)
printenv_system_path = ["/usr/bin/printenv"]
# Print all environment variables.
define_program(
program="printenv",
args=[],
system_path=printenv_system_path,
# This variant of `printenv` only allows zero args.
should_match=[[]],
should_not_match=[["PATH"]],
)
# Print a specific environment variable.
define_program(
program="printenv",
args=[ARG_OPAQUE_VALUE],
system_path=printenv_system_path,
# This variant of `printenv` only allows exactly one arg.
should_match=[["PATH"]],
should_not_match=[[], ["PATH", "HOME"]],
)
# Note that `pwd` is generally implemented as a shell built-in. It does not
# accept any arguments.
define_program(
program="pwd",
options=[
flag("-L"),
flag("-P"),
],
args=[],
)
define_program(
program="rg",
options=[
opt("-A", ARG_POS_INT),
opt("-B", ARG_POS_INT),
opt("-C", ARG_POS_INT),
opt("-d", ARG_POS_INT),
opt("--max-depth", ARG_POS_INT),
opt("-g", ARG_OPAQUE_VALUE),
opt("--glob", ARG_OPAQUE_VALUE),
opt("-m", ARG_POS_INT),
opt("--max-count", ARG_POS_INT),
flag("-n"),
flag("-i"),
flag("-l"),
flag("--files"),
flag("--files-with-matches"),
flag("--files-without-match"),
],
args=[ARG_OPAQUE_VALUE, ARG_RFILES_OR_CWD],
should_match=[
["-n", "init"],
["-n", "init", "."],
["-i", "-n", "init", "src"],
["--files", "--max-depth", "2", "."],
],
should_not_match=[
["-m", "-n", "init"],
["--glob", "src"],
],
# TODO(mbolin): Perhaps we need a way to indicate that we expect `rg` to be
# bundled with the host environment and we should be using that verison.
system_path=[],
)
# Unfortunately, `sed` is difficult to secure because GNU sed supports an `e`
# flag where `s/pattern/replacement/e` would run `replacement` as a shell
# command every time `pattern` is matched. For example, try the following on
# Ubuntu (which uses GNU sed, unlike macOS):
#
# ```shell
# $ yes | head -n 4 > /tmp/yes.txt
# $ sed 's/y/echo hi/e' /tmp/yes.txt
# hi
# hi
# hi
# hi
# ```
#
# As you can see, `echo hi` got executed four times. In order to support some
# basic sed functionality, we implement a bespoke `ARG_SED_COMMAND` that matches
# only "known safe" sed commands.
common_sed_flags = [
# We deliberately do not support -i or -f.
flag("-n"),
flag("-u"),
]
sed_system_path = ["/usr/bin/sed"]
# When -e is not specified, the first argument must be a valid sed command.
define_program(
program="sed",
options=common_sed_flags,
args=[ARG_SED_COMMAND, ARG_RFILES],
system_path=sed_system_path,
)
# When -e is required, all arguments are assumed to be readable files.
define_program(
program="sed",
options=common_sed_flags + [
opt("-e", ARG_SED_COMMAND, required=True),
],
args=[ARG_RFILES],
system_path=sed_system_path,
)
define_program(
program="which",
options=[
flag("-a"),
flag("-s"),
],
# Surprisingly, `which` takes more than one argument.
args=[ARG_RFILES],
should_match=[
["python3"],
["-a", "python3"],
["-a", "python3", "cargo"],
],
should_not_match=[
[],
],
system_path=["/bin/which", "/usr/bin/which"],
)

View File

@@ -0,0 +1,96 @@
use std::path::PathBuf;
use serde::Serialize;
use crate::arg_matcher::ArgMatcher;
use crate::arg_resolver::PositionalArg;
use serde_with::serde_as;
use serde_with::DisplayFromStr;
pub type Result<T> = std::result::Result<T, Error>;
#[serde_as]
#[derive(Debug, Eq, PartialEq, Serialize)]
#[serde(tag = "type")]
pub enum Error {
NoSpecForProgram {
program: String,
},
OptionMissingValue {
program: String,
option: String,
},
OptionFollowedByOptionInsteadOfValue {
program: String,
option: String,
value: String,
},
UnknownOption {
program: String,
option: String,
},
UnexpectedArguments {
program: String,
args: Vec<PositionalArg>,
},
DoubleDashNotSupportedYet {
program: String,
},
MultipleVarargPatterns {
program: String,
first: ArgMatcher,
second: ArgMatcher,
},
RangeStartExceedsEnd {
start: usize,
end: usize,
},
RangeEndOutOfBounds {
end: usize,
len: usize,
},
PrefixOverlapsSuffix {},
NotEnoughArgs {
program: String,
args: Vec<PositionalArg>,
arg_patterns: Vec<ArgMatcher>,
},
InternalInvariantViolation {
message: String,
},
VarargMatcherDidNotMatchAnything {
program: String,
matcher: ArgMatcher,
},
EmptyFileName {},
LiteralValueDidNotMatch {
expected: String,
actual: String,
},
InvalidPositiveInteger {
value: String,
},
MissingRequiredOptions {
program: String,
options: Vec<String>,
},
SedCommandNotProvablySafe {
command: String,
},
ReadablePathNotInReadableFolders {
file: PathBuf,
folders: Vec<PathBuf>,
},
WriteablePathNotInWriteableFolders {
file: PathBuf,
folders: Vec<PathBuf>,
},
CannotCheckRelativePath {
file: PathBuf,
},
CannotCanonicalizePath {
file: String,
#[serde_as(as = "DisplayFromStr")]
error: std::io::ErrorKind,
},
}

View File

@@ -0,0 +1,28 @@
use std::fmt::Display;
use serde::Serialize;
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
pub struct ExecCall {
pub program: String,
pub args: Vec<String>,
}
impl ExecCall {
pub fn new(program: &str, args: &[&str]) -> Self {
Self {
program: program.to_string(),
args: args.iter().map(|&s| s.into()).collect(),
}
}
}
impl Display for ExecCall {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.program)?;
for arg in &self.args {
write!(f, " {}", arg)?;
}
Ok(())
}
}

View File

@@ -0,0 +1,263 @@
use std::ffi::OsString;
use std::path::Path;
use std::path::PathBuf;
use crate::ArgType;
use crate::Error::CannotCanonicalizePath;
use crate::Error::CannotCheckRelativePath;
use crate::Error::ReadablePathNotInReadableFolders;
use crate::Error::WriteablePathNotInWriteableFolders;
use crate::ExecCall;
use crate::MatchedExec;
use crate::Policy;
use crate::Result;
use crate::ValidExec;
use path_absolutize::*;
use std::os::unix::fs::PermissionsExt;
macro_rules! check_file_in_folders {
($file:expr, $folders:expr, $error:ident) => {
if !$folders.iter().any(|folder| $file.starts_with(folder)) {
return Err($error {
file: $file.clone(),
folders: $folders.to_vec(),
});
}
};
}
pub struct ExecvChecker {
execv_policy: Policy,
}
impl ExecvChecker {
pub fn new(execv_policy: Policy) -> Self {
Self { execv_policy }
}
pub fn r#match(&self, exec_call: &ExecCall) -> Result<MatchedExec> {
self.execv_policy.check(exec_call)
}
/// The caller is responsible for ensuring readable_folders and
/// writeable_folders are in canonical form.
pub fn check(
&self,
valid_exec: ValidExec,
cwd: &Option<OsString>,
readable_folders: &[PathBuf],
writeable_folders: &[PathBuf],
) -> Result<String> {
for (arg_type, value) in valid_exec
.args
.into_iter()
.map(|arg| (arg.r#type, arg.value))
.chain(
valid_exec
.opts
.into_iter()
.map(|opt| (opt.r#type, opt.value)),
)
{
match arg_type {
ArgType::ReadableFile => {
let readable_file = ensure_absolute_path(&value, cwd)?;
check_file_in_folders!(
readable_file,
readable_folders,
ReadablePathNotInReadableFolders
);
}
ArgType::WriteableFile => {
let writeable_file = ensure_absolute_path(&value, cwd)?;
check_file_in_folders!(
writeable_file,
writeable_folders,
WriteablePathNotInWriteableFolders
);
}
ArgType::OpaqueNonFile
| ArgType::Unknown
| ArgType::PositiveInteger
| ArgType::SedCommand
| ArgType::Literal(_) => {
continue;
}
}
}
let mut program = valid_exec.program.to_string();
for system_path in valid_exec.system_path {
if is_executable_file(&system_path) {
program = system_path.to_string();
break;
}
}
Ok(program)
}
}
fn ensure_absolute_path(path: &str, cwd: &Option<OsString>) -> Result<PathBuf> {
let file = PathBuf::from(path);
let result = if file.is_relative() {
match cwd {
Some(cwd) => file.absolutize_from(cwd),
None => return Err(CannotCheckRelativePath { file }),
}
} else {
file.absolutize()
};
result
.map(|path| path.into_owned())
.map_err(|error| CannotCanonicalizePath {
file: path.to_string(),
error: error.kind(),
})
}
fn is_executable_file(path: &str) -> bool {
let file_path = Path::new(path);
if let Ok(metadata) = std::fs::metadata(file_path) {
let permissions = metadata.permissions();
// Check if the file is executable (by checking the executable bit for the owner)
return metadata.is_file() && (permissions.mode() & 0o111 != 0);
}
false
}
#[cfg(test)]
mod tests {
use tempfile::TempDir;
use super::*;
use crate::MatchedArg;
use crate::PolicyParser;
fn setup(fake_cp: &Path) -> ExecvChecker {
let source = format!(
r#"
define_program(
program="cp",
args=[ARG_RFILE, ARG_WFILE],
system_path=[{fake_cp:?}]
)
"#
);
let parser = PolicyParser::new("#test", &source);
let policy = parser.parse().unwrap();
ExecvChecker::new(policy)
}
#[test]
fn test_check_valid_input_files() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
// Create an executable file that can be used with the system_path arg.
let fake_cp = temp_dir.path().join("cp");
let fake_cp_file = std::fs::File::create(&fake_cp).unwrap();
let mut permissions = fake_cp_file.metadata().unwrap().permissions();
permissions.set_mode(0o755);
std::fs::set_permissions(&fake_cp, permissions).unwrap();
// Create root_path and reference to files under the root.
let root_path = temp_dir.path().to_path_buf();
let source_path = root_path.join("source");
let dest_path = root_path.join("dest");
let cp = fake_cp.to_str().unwrap().to_string();
let root = root_path.to_str().unwrap().to_string();
let source = source_path.to_str().unwrap().to_string();
let dest = dest_path.to_str().unwrap().to_string();
let cwd = Some(root_path.clone().into());
let checker = setup(&fake_cp);
let exec_call = ExecCall {
program: "cp".into(),
args: vec![source.clone(), dest.clone()],
};
let valid_exec = match checker.r#match(&exec_call)? {
MatchedExec::Match { exec } => exec,
unexpected => panic!("Expected a safe exec but got {unexpected:?}"),
};
// No readable or writeable folders specified.
assert_eq!(
checker.check(valid_exec.clone(), &cwd, &[], &[]),
Err(ReadablePathNotInReadableFolders {
file: source_path.clone(),
folders: vec![]
}),
);
// Only readable folders specified.
assert_eq!(
checker.check(valid_exec.clone(), &cwd, &[root_path.clone()], &[]),
Err(WriteablePathNotInWriteableFolders {
file: dest_path.clone(),
folders: vec![]
}),
);
// Both readable and writeable folders specified.
assert_eq!(
checker.check(
valid_exec.clone(),
&cwd,
&[root_path.clone()],
&[root_path.clone()]
),
Ok(cp.clone()),
);
// Args are the readable and writeable folders, not files within the
// folders.
let exec_call_folders_as_args = ExecCall {
program: "cp".into(),
args: vec![root.clone(), root.clone()],
};
let valid_exec_call_folders_as_args = match checker.r#match(&exec_call_folders_as_args)? {
MatchedExec::Match { exec } => exec,
_ => panic!("Expected a safe exec"),
};
assert_eq!(
checker.check(
valid_exec_call_folders_as_args,
&cwd,
&[root_path.clone()],
&[root_path.clone()]
),
Ok(cp.clone()),
);
// Specify a parent of a readable folder as input.
let exec_with_parent_of_readable_folder = ValidExec {
program: "cp".into(),
args: vec![
MatchedArg::new(
0,
ArgType::ReadableFile,
root_path.parent().unwrap().to_str().unwrap(),
)?,
MatchedArg::new(1, ArgType::WriteableFile, &dest)?,
],
..Default::default()
};
assert_eq!(
checker.check(
exec_with_parent_of_readable_folder,
&cwd,
&[root_path.clone()],
&[dest_path.clone()]
),
Err(ReadablePathNotInReadableFolders {
file: root_path.parent().unwrap().to_path_buf(),
folders: vec![root_path.clone()]
}),
);
Ok(())
}
}

View File

@@ -0,0 +1,45 @@
#![allow(clippy::type_complexity)]
#![allow(clippy::too_many_arguments)]
#[macro_use]
extern crate starlark;
mod arg_matcher;
mod arg_resolver;
mod arg_type;
mod error;
mod exec_call;
mod execv_checker;
mod opt;
mod policy;
mod policy_parser;
mod program;
mod sed_command;
mod valid_exec;
pub use arg_matcher::ArgMatcher;
pub use arg_resolver::PositionalArg;
pub use arg_type::ArgType;
pub use error::Error;
pub use error::Result;
pub use exec_call::ExecCall;
pub use execv_checker::ExecvChecker;
pub use opt::Opt;
pub use policy::Policy;
pub use policy_parser::PolicyParser;
pub use program::Forbidden;
pub use program::MatchedExec;
pub use program::NegativeExamplePassedCheck;
pub use program::PositiveExampleFailedCheck;
pub use program::ProgramSpec;
pub use sed_command::parse_sed_command;
pub use valid_exec::MatchedArg;
pub use valid_exec::MatchedFlag;
pub use valid_exec::MatchedOpt;
pub use valid_exec::ValidExec;
const DEFAULT_POLICY: &str = include_str!("default.policy");
pub fn get_default_policy() -> starlark::Result<Policy> {
let parser = PolicyParser::new("#default", DEFAULT_POLICY);
parser.parse()
}

View File

@@ -0,0 +1,166 @@
use anyhow::Result;
use clap::Parser;
use clap::Subcommand;
use codex_execpolicy::get_default_policy;
use codex_execpolicy::ExecCall;
use codex_execpolicy::MatchedExec;
use codex_execpolicy::Policy;
use codex_execpolicy::PolicyParser;
use codex_execpolicy::ValidExec;
use serde::de;
use serde::Deserialize;
use serde::Serialize;
use std::path::PathBuf;
use std::str::FromStr;
const MATCHED_BUT_WRITES_FILES_EXIT_CODE: i32 = 12;
const MIGHT_BE_SAFE_EXIT_CODE: i32 = 13;
const FORBIDDEN_EXIT_CODE: i32 = 14;
#[derive(Parser, Deserialize, Debug)]
#[command(version, about, long_about = None)]
pub struct Args {
/// If the command fails the policy, exit with 13, but print parseable JSON
/// to stdout.
#[clap(long)]
pub require_safe: bool,
/// Path to the policy file.
#[clap(long, short = 'p')]
pub policy: Option<PathBuf>,
#[command(subcommand)]
pub command: Command,
}
#[derive(Clone, Debug, Deserialize, Subcommand)]
pub enum Command {
/// Checks the command as if the arguments were the inputs to execv(3).
Check {
#[arg(trailing_var_arg = true)]
command: Vec<String>,
},
/// Checks the command encoded as a JSON object.
#[clap(name = "check-json")]
CheckJson {
/// JSON object with "program" (str) and "args" (list[str]) fields.
#[serde(deserialize_with = "deserialize_from_json")]
exec: ExecArg,
},
}
#[derive(Clone, Debug, Deserialize)]
pub struct ExecArg {
pub program: String,
#[serde(default)]
pub args: Vec<String>,
}
fn main() -> Result<()> {
env_logger::init();
let args = Args::parse();
let policy = match args.policy {
Some(policy) => {
let policy_source = policy.to_string_lossy().to_string();
let unparsed_policy = std::fs::read_to_string(policy)?;
let parser = PolicyParser::new(&policy_source, &unparsed_policy);
parser.parse()
}
None => get_default_policy(),
};
let policy = policy.map_err(|err| err.into_anyhow())?;
let exec = match args.command {
Command::Check { command } => match command.split_first() {
Some((first, rest)) => ExecArg {
program: first.to_string(),
args: rest.iter().map(|s| s.to_string()).collect(),
},
None => {
eprintln!("no command provided");
std::process::exit(1);
}
},
Command::CheckJson { exec } => exec,
};
let (output, exit_code) = check_command(&policy, exec, args.require_safe);
let json = serde_json::to_string(&output)?;
println!("{}", json);
std::process::exit(exit_code);
}
fn check_command(
policy: &Policy,
ExecArg { program, args }: ExecArg,
check: bool,
) -> (Output, i32) {
let exec_call = ExecCall { program, args };
match policy.check(&exec_call) {
Ok(MatchedExec::Match { exec }) => {
if exec.might_write_files() {
let exit_code = if check {
MATCHED_BUT_WRITES_FILES_EXIT_CODE
} else {
0
};
(Output::Match { r#match: exec }, exit_code)
} else {
(Output::Safe { r#match: exec }, 0)
}
}
Ok(MatchedExec::Forbidden { reason, cause }) => {
let exit_code = if check { FORBIDDEN_EXIT_CODE } else { 0 };
(Output::Forbidden { reason, cause }, exit_code)
}
Err(err) => {
let exit_code = if check { MIGHT_BE_SAFE_EXIT_CODE } else { 0 };
(Output::Unverified { error: err }, exit_code)
}
}
}
#[derive(Debug, Serialize)]
#[serde(tag = "result")]
pub enum Output {
/// The command is verified as safe.
#[serde(rename = "safe")]
Safe { r#match: ValidExec },
/// The command has matched a rule in the policy, but the caller should
/// decide whether it is "safe" given the files it wants to write.
#[serde(rename = "match")]
Match { r#match: ValidExec },
/// The user is forbidden from running the command.
#[serde(rename = "forbidden")]
Forbidden {
reason: String,
cause: codex_execpolicy::Forbidden,
},
/// The safety of the command could not be verified.
#[serde(rename = "unverified")]
Unverified { error: codex_execpolicy::Error },
}
fn deserialize_from_json<'de, D>(deserializer: D) -> Result<ExecArg, D::Error>
where
D: de::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
let decoded = serde_json::from_str(&s)
.map_err(|e| serde::de::Error::custom(format!("JSON parse error: {e}")))?;
Ok(decoded)
}
impl FromStr for ExecArg {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
serde_json::from_str(s).map_err(|e| e.into())
}
}

View File

@@ -0,0 +1,77 @@
#![allow(clippy::needless_lifetimes)]
use crate::starlark::values::ValueLike;
use crate::ArgType;
use allocative::Allocative;
use derive_more::derive::Display;
use starlark::any::ProvidesStaticType;
use starlark::values::starlark_value;
use starlark::values::AllocValue;
use starlark::values::Heap;
use starlark::values::NoSerialize;
use starlark::values::StarlarkValue;
use starlark::values::UnpackValue;
use starlark::values::Value;
/// Command line option that takes a value.
#[derive(Clone, Debug, Display, PartialEq, Eq, ProvidesStaticType, NoSerialize, Allocative)]
#[display("opt({})", opt)]
pub struct Opt {
/// The option as typed on the command line, e.g., `-h` or `--help`. If
/// it can be used in the `--name=value` format, then this should be
/// `--name` (though this is subject to change).
pub opt: String,
pub meta: OptMeta,
pub required: bool,
}
/// When defining an Opt, use as specific an OptMeta as possible.
#[derive(Clone, Debug, Display, PartialEq, Eq, ProvidesStaticType, NoSerialize, Allocative)]
#[display("{}", self)]
pub enum OptMeta {
/// Option does not take a value.
Flag,
/// Option takes a single value matching the specified type.
Value(ArgType),
}
impl Opt {
pub fn new(opt: String, meta: OptMeta, required: bool) -> Self {
Self {
opt,
meta,
required,
}
}
pub fn name(&self) -> &str {
&self.opt
}
}
#[starlark_value(type = "Opt")]
impl<'v> StarlarkValue<'v> for Opt {
type Canonical = Opt;
}
impl<'v> UnpackValue<'v> for Opt {
type Error = starlark::Error;
fn unpack_value_impl(value: Value<'v>) -> starlark::Result<Option<Self>> {
// TODO(mbolin): It fels like this should be doable without cloning?
// Cannot simply consume the value?
Ok(value.downcast_ref::<Opt>().cloned())
}
}
impl<'v> AllocValue<'v> for Opt {
fn alloc_value(self, heap: &'v Heap) -> Value<'v> {
heap.alloc_simple(self)
}
}
#[starlark_value(type = "OptMeta")]
impl<'v> StarlarkValue<'v> for OptMeta {
type Canonical = OptMeta;
}

View File

@@ -0,0 +1,103 @@
use multimap::MultiMap;
use regex::Error as RegexError;
use regex::Regex;
use crate::error::Error;
use crate::error::Result;
use crate::policy_parser::ForbiddenProgramRegex;
use crate::program::PositiveExampleFailedCheck;
use crate::ExecCall;
use crate::Forbidden;
use crate::MatchedExec;
use crate::NegativeExamplePassedCheck;
use crate::ProgramSpec;
pub struct Policy {
programs: MultiMap<String, ProgramSpec>,
forbidden_program_regexes: Vec<ForbiddenProgramRegex>,
forbidden_substrings_pattern: Option<Regex>,
}
impl Policy {
pub fn new(
programs: MultiMap<String, ProgramSpec>,
forbidden_program_regexes: Vec<ForbiddenProgramRegex>,
forbidden_substrings: Vec<String>,
) -> std::result::Result<Self, RegexError> {
let forbidden_substrings_pattern = if forbidden_substrings.is_empty() {
None
} else {
let escaped_substrings = forbidden_substrings
.iter()
.map(|s| regex::escape(s))
.collect::<Vec<_>>()
.join("|");
Some(Regex::new(&format!("({escaped_substrings})"))?)
};
Ok(Self {
programs,
forbidden_program_regexes,
forbidden_substrings_pattern,
})
}
pub fn check(&self, exec_call: &ExecCall) -> Result<MatchedExec> {
let ExecCall { program, args } = &exec_call;
for ForbiddenProgramRegex { regex, reason } in &self.forbidden_program_regexes {
if regex.is_match(program) {
return Ok(MatchedExec::Forbidden {
cause: Forbidden::Program {
program: program.clone(),
exec_call: exec_call.clone(),
},
reason: reason.clone(),
});
}
}
for arg in args {
if let Some(regex) = &self.forbidden_substrings_pattern {
if regex.is_match(arg) {
return Ok(MatchedExec::Forbidden {
cause: Forbidden::Arg {
arg: arg.clone(),
exec_call: exec_call.clone(),
},
reason: format!("arg `{}` contains forbidden substring", arg),
});
}
}
}
let mut last_err = Err(Error::NoSpecForProgram {
program: program.clone(),
});
if let Some(spec_list) = self.programs.get_vec(program) {
for spec in spec_list {
match spec.check(exec_call) {
Ok(matched_exec) => return Ok(matched_exec),
Err(err) => {
last_err = Err(err);
}
}
}
}
last_err
}
pub fn check_each_good_list_individually(&self) -> Vec<PositiveExampleFailedCheck> {
let mut violations = Vec::new();
for (_program, spec) in self.programs.flat_iter() {
violations.extend(spec.verify_should_match_list());
}
violations
}
pub fn check_each_bad_list_individually(&self) -> Vec<NegativeExamplePassedCheck> {
let mut violations = Vec::new();
for (_program, spec) in self.programs.flat_iter() {
violations.extend(spec.verify_should_not_match_list());
}
violations
}
}

View File

@@ -0,0 +1,222 @@
#![allow(clippy::needless_lifetimes)]
use crate::arg_matcher::ArgMatcher;
use crate::opt::OptMeta;
use crate::Opt;
use crate::Policy;
use crate::ProgramSpec;
use log::info;
use multimap::MultiMap;
use regex::Regex;
use starlark::any::ProvidesStaticType;
use starlark::environment::GlobalsBuilder;
use starlark::environment::LibraryExtension;
use starlark::environment::Module;
use starlark::eval::Evaluator;
use starlark::syntax::AstModule;
use starlark::syntax::Dialect;
use starlark::values::list::UnpackList;
use starlark::values::none::NoneType;
use starlark::values::Heap;
use std::cell::RefCell;
use std::collections::HashMap;
pub struct PolicyParser {
policy_source: String,
unparsed_policy: String,
}
impl PolicyParser {
pub fn new(policy_source: &str, unparsed_policy: &str) -> Self {
Self {
policy_source: policy_source.to_string(),
unparsed_policy: unparsed_policy.to_string(),
}
}
pub fn parse(&self) -> starlark::Result<Policy> {
let mut dialect = Dialect::Extended.clone();
dialect.enable_f_strings = true;
let ast = AstModule::parse(&self.policy_source, self.unparsed_policy.clone(), &dialect)?;
let globals = GlobalsBuilder::extended_by(&[LibraryExtension::Typing])
.with(policy_builtins)
.build();
let module = Module::new();
let heap = Heap::new();
module.set("ARG_OPAQUE_VALUE", heap.alloc(ArgMatcher::OpaqueNonFile));
module.set("ARG_RFILE", heap.alloc(ArgMatcher::ReadableFile));
module.set("ARG_WFILE", heap.alloc(ArgMatcher::WriteableFile));
module.set("ARG_RFILES", heap.alloc(ArgMatcher::ReadableFiles));
module.set(
"ARG_RFILES_OR_CWD",
heap.alloc(ArgMatcher::ReadableFilesOrCwd),
);
module.set("ARG_POS_INT", heap.alloc(ArgMatcher::PositiveInteger));
module.set("ARG_SED_COMMAND", heap.alloc(ArgMatcher::SedCommand));
module.set(
"ARG_UNVERIFIED_VARARGS",
heap.alloc(ArgMatcher::UnverifiedVarargs),
);
let policy_builder = PolicyBuilder::new();
{
let mut eval = Evaluator::new(&module);
eval.extra = Some(&policy_builder);
eval.eval_module(ast, &globals)?;
}
let policy = policy_builder.build();
policy.map_err(|e| starlark::Error::new_kind(starlark::ErrorKind::Other(e.into())))
}
}
#[derive(Debug)]
pub struct ForbiddenProgramRegex {
pub regex: regex::Regex,
pub reason: String,
}
#[derive(Debug, ProvidesStaticType)]
struct PolicyBuilder {
programs: RefCell<MultiMap<String, ProgramSpec>>,
forbidden_program_regexes: RefCell<Vec<ForbiddenProgramRegex>>,
forbidden_substrings: RefCell<Vec<String>>,
}
impl PolicyBuilder {
fn new() -> Self {
Self {
programs: RefCell::new(MultiMap::new()),
forbidden_program_regexes: RefCell::new(Vec::new()),
forbidden_substrings: RefCell::new(Vec::new()),
}
}
fn build(self) -> Result<Policy, regex::Error> {
let programs = self.programs.into_inner();
let forbidden_program_regexes = self.forbidden_program_regexes.into_inner();
let forbidden_substrings = self.forbidden_substrings.into_inner();
Policy::new(programs, forbidden_program_regexes, forbidden_substrings)
}
fn add_program_spec(&self, program_spec: ProgramSpec) {
info!("adding program spec: {:?}", program_spec);
let name = program_spec.program.clone();
let mut programs = self.programs.borrow_mut();
programs.insert(name.clone(), program_spec);
}
fn add_forbidden_substrings(&self, substrings: &[String]) {
let mut forbidden_substrings = self.forbidden_substrings.borrow_mut();
forbidden_substrings.extend_from_slice(substrings);
}
fn add_forbidden_program_regex(&self, regex: Regex, reason: String) {
let mut forbidden_program_regexes = self.forbidden_program_regexes.borrow_mut();
forbidden_program_regexes.push(ForbiddenProgramRegex { regex, reason });
}
}
#[starlark_module]
fn policy_builtins(builder: &mut GlobalsBuilder) {
fn define_program<'v>(
program: String,
system_path: Option<UnpackList<String>>,
option_bundling: Option<bool>,
combined_format: Option<bool>,
options: Option<UnpackList<Opt>>,
args: Option<UnpackList<ArgMatcher>>,
forbidden: Option<String>,
should_match: Option<UnpackList<UnpackList<String>>>,
should_not_match: Option<UnpackList<UnpackList<String>>>,
eval: &mut Evaluator,
) -> anyhow::Result<NoneType> {
let option_bundling = option_bundling.unwrap_or(false);
let system_path = system_path.map_or_else(Vec::new, |v| v.items.to_vec());
let combined_format = combined_format.unwrap_or(false);
let options = options.map_or_else(Vec::new, |v| v.items.to_vec());
let args = args.map_or_else(Vec::new, |v| v.items.to_vec());
let mut allowed_options = HashMap::<String, Opt>::new();
for opt in options {
let name = opt.name().to_string();
if allowed_options
.insert(opt.name().to_string(), opt)
.is_some()
{
return Err(anyhow::format_err!("duplicate flag: {name}"));
}
}
let program_spec = ProgramSpec::new(
program,
system_path,
option_bundling,
combined_format,
allowed_options,
args,
forbidden,
should_match
.map_or_else(Vec::new, |v| v.items.to_vec())
.into_iter()
.map(|v| v.items.to_vec())
.collect(),
should_not_match
.map_or_else(Vec::new, |v| v.items.to_vec())
.into_iter()
.map(|v| v.items.to_vec())
.collect(),
);
let policy_builder = eval
.extra
.as_ref()
.unwrap()
.downcast_ref::<PolicyBuilder>()
.unwrap();
policy_builder.add_program_spec(program_spec);
Ok(NoneType)
}
fn forbid_substrings(
strings: UnpackList<String>,
eval: &mut Evaluator,
) -> anyhow::Result<NoneType> {
let policy_builder = eval
.extra
.as_ref()
.unwrap()
.downcast_ref::<PolicyBuilder>()
.unwrap();
policy_builder.add_forbidden_substrings(&strings.items.to_vec());
Ok(NoneType)
}
fn forbid_program_regex(
regex: String,
reason: String,
eval: &mut Evaluator,
) -> anyhow::Result<NoneType> {
let policy_builder = eval
.extra
.as_ref()
.unwrap()
.downcast_ref::<PolicyBuilder>()
.unwrap();
let compiled_regex = regex::Regex::new(&regex)?;
policy_builder.add_forbidden_program_regex(compiled_regex, reason);
Ok(NoneType)
}
fn opt(name: String, r#type: ArgMatcher, required: Option<bool>) -> anyhow::Result<Opt> {
Ok(Opt::new(
name,
OptMeta::Value(r#type.arg_type()),
required.unwrap_or(false),
))
}
fn flag(name: String) -> anyhow::Result<Opt> {
Ok(Opt::new(name, OptMeta::Flag, false))
}
}

View File

@@ -0,0 +1,247 @@
use serde::Serialize;
use std::collections::HashMap;
use std::collections::HashSet;
use crate::arg_matcher::ArgMatcher;
use crate::arg_resolver::resolve_observed_args_with_patterns;
use crate::arg_resolver::PositionalArg;
use crate::error::Error;
use crate::error::Result;
use crate::opt::Opt;
use crate::opt::OptMeta;
use crate::valid_exec::MatchedFlag;
use crate::valid_exec::MatchedOpt;
use crate::valid_exec::ValidExec;
use crate::ArgType;
use crate::ExecCall;
#[derive(Debug)]
pub struct ProgramSpec {
pub program: String,
pub system_path: Vec<String>,
pub option_bundling: bool,
pub combined_format: bool,
pub allowed_options: HashMap<String, Opt>,
pub arg_patterns: Vec<ArgMatcher>,
forbidden: Option<String>,
required_options: HashSet<String>,
should_match: Vec<Vec<String>>,
should_not_match: Vec<Vec<String>>,
}
impl ProgramSpec {
pub fn new(
program: String,
system_path: Vec<String>,
option_bundling: bool,
combined_format: bool,
allowed_options: HashMap<String, Opt>,
arg_patterns: Vec<ArgMatcher>,
forbidden: Option<String>,
should_match: Vec<Vec<String>>,
should_not_match: Vec<Vec<String>>,
) -> Self {
let required_options = allowed_options
.iter()
.filter_map(|(name, opt)| {
if opt.required {
Some(name.clone())
} else {
None
}
})
.collect();
Self {
program,
system_path,
option_bundling,
combined_format,
allowed_options,
arg_patterns,
forbidden,
required_options,
should_match,
should_not_match,
}
}
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
pub enum MatchedExec {
Match { exec: ValidExec },
Forbidden { cause: Forbidden, reason: String },
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
pub enum Forbidden {
Program {
program: String,
exec_call: ExecCall,
},
Arg {
arg: String,
exec_call: ExecCall,
},
Exec {
exec: ValidExec,
},
}
impl ProgramSpec {
// TODO(mbolin): The idea is that there should be a set of rules defined for
// a program and the args should be checked against the rules to determine
// if the program should be allowed to run.
pub fn check(&self, exec_call: &ExecCall) -> Result<MatchedExec> {
let mut expecting_option_value: Option<(String, ArgType)> = None;
let mut args = Vec::<PositionalArg>::new();
let mut matched_flags = Vec::<MatchedFlag>::new();
let mut matched_opts = Vec::<MatchedOpt>::new();
for (index, arg) in exec_call.args.iter().enumerate() {
if let Some(expected) = expecting_option_value {
// If we are expecting an option value, then the next argument
// should be the value for the option.
// This had better not be another option!
let (name, arg_type) = expected;
if arg.starts_with("-") {
return Err(Error::OptionFollowedByOptionInsteadOfValue {
program: self.program.clone(),
option: name,
value: arg.clone(),
});
}
matched_opts.push(MatchedOpt::new(&name, arg, arg_type)?);
expecting_option_value = None;
} else if arg == "--" {
return Err(Error::DoubleDashNotSupportedYet {
program: self.program.clone(),
});
} else if arg.starts_with("-") {
match self.allowed_options.get(arg) {
Some(opt) => {
match &opt.meta {
OptMeta::Flag => {
matched_flags.push(MatchedFlag { name: arg.clone() });
// A flag does not expect an argument: continue.
continue;
}
OptMeta::Value(arg_type) => {
expecting_option_value = Some((arg.clone(), arg_type.clone()));
continue;
}
}
}
None => {
// It could be an --option=value style flag...
}
}
return Err(Error::UnknownOption {
program: self.program.clone(),
option: arg.clone(),
});
} else {
args.push(PositionalArg {
index,
value: arg.clone(),
});
}
}
if let Some(expected) = expecting_option_value {
let (name, _arg_type) = expected;
return Err(Error::OptionMissingValue {
program: self.program.clone(),
option: name,
});
}
let matched_args =
resolve_observed_args_with_patterns(&self.program, args, &self.arg_patterns)?;
// Verify all required options are present.
let matched_opt_names: HashSet<String> = matched_opts
.iter()
.map(|opt| opt.name().to_string())
.collect();
if !matched_opt_names.is_superset(&self.required_options) {
let mut options = self
.required_options
.difference(&matched_opt_names)
.map(|s| s.to_string())
.collect::<Vec<_>>();
options.sort();
return Err(Error::MissingRequiredOptions {
program: self.program.clone(),
options,
});
}
let exec = ValidExec {
program: self.program.clone(),
flags: matched_flags,
opts: matched_opts,
args: matched_args,
system_path: self.system_path.clone(),
};
match &self.forbidden {
Some(reason) => Ok(MatchedExec::Forbidden {
cause: Forbidden::Exec { exec },
reason: reason.clone(),
}),
None => Ok(MatchedExec::Match { exec }),
}
}
pub fn verify_should_match_list(&self) -> Vec<PositiveExampleFailedCheck> {
let mut violations = Vec::new();
for good in &self.should_match {
let exec_call = ExecCall {
program: self.program.clone(),
args: good.clone(),
};
match self.check(&exec_call) {
Ok(_) => {}
Err(error) => {
violations.push(PositiveExampleFailedCheck {
program: self.program.clone(),
args: good.clone(),
error,
});
}
}
}
violations
}
pub fn verify_should_not_match_list(&self) -> Vec<NegativeExamplePassedCheck> {
let mut violations = Vec::new();
for bad in &self.should_not_match {
let exec_call = ExecCall {
program: self.program.clone(),
args: bad.clone(),
};
if self.check(&exec_call).is_ok() {
violations.push(NegativeExamplePassedCheck {
program: self.program.clone(),
args: bad.clone(),
});
}
}
violations
}
}
#[derive(Debug, Eq, PartialEq)]
pub struct PositiveExampleFailedCheck {
pub program: String,
pub args: Vec<String>,
pub error: Error,
}
#[derive(Debug, Eq, PartialEq)]
pub struct NegativeExamplePassedCheck {
pub program: String,
pub args: Vec<String>,
}

View File

@@ -0,0 +1,17 @@
use crate::error::Error;
use crate::error::Result;
pub fn parse_sed_command(sed_command: &str) -> Result<()> {
// For now, we parse only commands like `122,202p`.
if let Some(stripped) = sed_command.strip_suffix("p") {
if let Some((first, rest)) = stripped.split_once(",") {
if first.parse::<u64>().is_ok() && rest.parse::<u64>().is_ok() {
return Ok(());
}
}
}
Err(Error::SedCommandNotProvablySafe {
command: sed_command.to_string(),
})
}

View File

@@ -0,0 +1,95 @@
use crate::arg_type::ArgType;
use crate::error::Result;
use serde::Serialize;
/// exec() invocation that has been accepted by a `Policy`.
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)]
pub struct ValidExec {
pub program: String,
pub flags: Vec<MatchedFlag>,
pub opts: Vec<MatchedOpt>,
pub args: Vec<MatchedArg>,
/// If non-empty, a prioritized list of paths to try instead of `program`.
/// For example, `/bin/ls` is harder to compromise than whatever `ls`
/// happens to be in the user's `$PATH`, so `/bin/ls` would be included for
/// `ls`. The caller is free to disregard this list and use `program`.
pub system_path: Vec<String>,
}
impl ValidExec {
pub fn new(program: &str, args: Vec<MatchedArg>, system_path: &[&str]) -> Self {
Self {
program: program.to_string(),
flags: vec![],
opts: vec![],
args,
system_path: system_path.iter().map(|&s| s.to_string()).collect(),
}
}
/// Whether a possible side effect of running this command includes writing
/// a file.
pub fn might_write_files(&self) -> bool {
self.opts.iter().any(|opt| opt.r#type.might_write_file())
|| self.args.iter().any(|opt| opt.r#type.might_write_file())
}
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
pub struct MatchedArg {
pub index: usize,
pub r#type: ArgType,
pub value: String,
}
impl MatchedArg {
pub fn new(index: usize, r#type: ArgType, value: &str) -> Result<Self> {
r#type.validate(value)?;
Ok(Self {
index,
r#type,
value: value.to_string(),
})
}
}
/// A match for an option declared with opt() in a .policy file.
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
pub struct MatchedOpt {
/// Name of the option that was matched.
pub name: String,
/// Value supplied for the option.
pub value: String,
/// Type of the value supplied for the option.
pub r#type: ArgType,
}
impl MatchedOpt {
pub fn new(name: &str, value: &str, r#type: ArgType) -> Result<Self> {
r#type.validate(value)?;
Ok(Self {
name: name.to_string(),
value: value.to_string(),
r#type,
})
}
pub fn name(&self) -> &str {
&self.name
}
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
pub struct MatchedFlag {
/// Name of the flag that was matched.
pub name: String,
}
impl MatchedFlag {
pub fn new(name: &str) -> Self {
Self {
name: name.to_string(),
}
}
}