Phase 1: Repository & Infrastructure Setup
- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli
- Updated package.json files:
- Root: llmx-monorepo
- CLI: @llmx/llmx
- SDK: @llmx/llmx-sdk
- Updated pnpm workspace configuration
- Renamed binary: codex.js -> llmx.js
- Updated environment variables: CODEX_* -> LLMX_*
- Changed repository URLs to valknar/llmx
🤖 Generated with Claude Code
This commit is contained in:
118
llmx-rs/execpolicy/src/arg_matcher.rs
Normal file
118
llmx-rs/execpolicy/src/arg_matcher.rs
Normal file
@@ -0,0 +1,118 @@
|
||||
#![allow(clippy::needless_lifetimes)]
|
||||
|
||||
use crate::arg_type::ArgType;
|
||||
use crate::starlark::values::ValueLike;
|
||||
use allocative::Allocative;
|
||||
use derive_more::derive::Display;
|
||||
use starlark::any::ProvidesStaticType;
|
||||
use starlark::values::AllocValue;
|
||||
use starlark::values::Heap;
|
||||
use starlark::values::NoSerialize;
|
||||
use starlark::values::StarlarkValue;
|
||||
use starlark::values::UnpackValue;
|
||||
use starlark::values::Value;
|
||||
use starlark::values::starlark_value;
|
||||
use starlark::values::string::StarlarkStr;
|
||||
|
||||
/// Patterns that lists of arguments should be compared against.
|
||||
#[derive(Clone, Debug, Display, Eq, PartialEq, NoSerialize, ProvidesStaticType, Allocative)]
|
||||
#[display("{}", self)]
|
||||
pub enum ArgMatcher {
|
||||
/// Literal string value.
|
||||
Literal(String),
|
||||
|
||||
/// We cannot say what type of value this should match, but it is *not* a file path.
|
||||
OpaqueNonFile,
|
||||
|
||||
/// Required readable file.
|
||||
ReadableFile,
|
||||
|
||||
/// Required writeable file.
|
||||
WriteableFile,
|
||||
|
||||
/// Non-empty list of readable files.
|
||||
ReadableFiles,
|
||||
|
||||
/// Non-empty list of readable files, or empty list, implying readable cwd.
|
||||
ReadableFilesOrCwd,
|
||||
|
||||
/// Positive integer, like one that is required for `head -n`.
|
||||
PositiveInteger,
|
||||
|
||||
/// Bespoke matcher for safe sed commands.
|
||||
SedCommand,
|
||||
|
||||
/// Matches an arbitrary number of arguments without attributing any
|
||||
/// particular meaning to them. Caller is responsible for interpreting them.
|
||||
UnverifiedVarargs,
|
||||
}
|
||||
|
||||
impl ArgMatcher {
|
||||
pub fn cardinality(&self) -> ArgMatcherCardinality {
|
||||
match self {
|
||||
ArgMatcher::Literal(_)
|
||||
| ArgMatcher::OpaqueNonFile
|
||||
| ArgMatcher::ReadableFile
|
||||
| ArgMatcher::WriteableFile
|
||||
| ArgMatcher::PositiveInteger
|
||||
| ArgMatcher::SedCommand => ArgMatcherCardinality::One,
|
||||
ArgMatcher::ReadableFiles => ArgMatcherCardinality::AtLeastOne,
|
||||
ArgMatcher::ReadableFilesOrCwd | ArgMatcher::UnverifiedVarargs => {
|
||||
ArgMatcherCardinality::ZeroOrMore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn arg_type(&self) -> ArgType {
|
||||
match self {
|
||||
ArgMatcher::Literal(value) => ArgType::Literal(value.clone()),
|
||||
ArgMatcher::OpaqueNonFile => ArgType::OpaqueNonFile,
|
||||
ArgMatcher::ReadableFile => ArgType::ReadableFile,
|
||||
ArgMatcher::WriteableFile => ArgType::WriteableFile,
|
||||
ArgMatcher::ReadableFiles => ArgType::ReadableFile,
|
||||
ArgMatcher::ReadableFilesOrCwd => ArgType::ReadableFile,
|
||||
ArgMatcher::PositiveInteger => ArgType::PositiveInteger,
|
||||
ArgMatcher::SedCommand => ArgType::SedCommand,
|
||||
ArgMatcher::UnverifiedVarargs => ArgType::Unknown,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum ArgMatcherCardinality {
|
||||
One,
|
||||
AtLeastOne,
|
||||
ZeroOrMore,
|
||||
}
|
||||
|
||||
impl ArgMatcherCardinality {
|
||||
pub fn is_exact(&self) -> Option<usize> {
|
||||
match self {
|
||||
ArgMatcherCardinality::One => Some(1),
|
||||
ArgMatcherCardinality::AtLeastOne => None,
|
||||
ArgMatcherCardinality::ZeroOrMore => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'v> AllocValue<'v> for ArgMatcher {
|
||||
fn alloc_value(self, heap: &'v Heap) -> Value<'v> {
|
||||
heap.alloc_simple(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[starlark_value(type = "ArgMatcher")]
|
||||
impl<'v> StarlarkValue<'v> for ArgMatcher {
|
||||
type Canonical = ArgMatcher;
|
||||
}
|
||||
|
||||
impl<'v> UnpackValue<'v> for ArgMatcher {
|
||||
type Error = starlark::Error;
|
||||
|
||||
fn unpack_value_impl(value: Value<'v>) -> starlark::Result<Option<Self>> {
|
||||
if let Some(str) = value.downcast_ref::<StarlarkStr>() {
|
||||
Ok(Some(ArgMatcher::Literal(str.as_str().to_string())))
|
||||
} else {
|
||||
Ok(value.downcast_ref::<ArgMatcher>().cloned())
|
||||
}
|
||||
}
|
||||
}
|
||||
204
llmx-rs/execpolicy/src/arg_resolver.rs
Normal file
204
llmx-rs/execpolicy/src/arg_resolver.rs
Normal file
@@ -0,0 +1,204 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::arg_matcher::ArgMatcher;
|
||||
use crate::arg_matcher::ArgMatcherCardinality;
|
||||
use crate::error::Error;
|
||||
use crate::error::Result;
|
||||
use crate::valid_exec::MatchedArg;
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
|
||||
pub struct PositionalArg {
|
||||
pub index: usize,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
pub fn resolve_observed_args_with_patterns(
|
||||
program: &str,
|
||||
args: Vec<PositionalArg>,
|
||||
arg_patterns: &Vec<ArgMatcher>,
|
||||
) -> Result<Vec<MatchedArg>> {
|
||||
// Naive matching implementation. Among `arg_patterns`, there is allowed to
|
||||
// be at most one vararg pattern. Assuming `arg_patterns` is non-empty, we
|
||||
// end up with either:
|
||||
//
|
||||
// - all `arg_patterns` in `prefix_patterns`
|
||||
// - `arg_patterns` split across `prefix_patterns` (which could be empty),
|
||||
// one `vararg_pattern`, and `suffix_patterns` (which could also empty).
|
||||
//
|
||||
// From there, we start by matching everything in `prefix_patterns`.
|
||||
// Then we calculate how many positional args should be matched by
|
||||
// `suffix_patterns` and use that to determine how many args are left to
|
||||
// be matched by `vararg_pattern` (which could be zero).
|
||||
//
|
||||
// After associating positional args with `vararg_pattern`, we match the
|
||||
// `suffix_patterns` with the remaining args.
|
||||
let ParitionedArgs {
|
||||
num_prefix_args,
|
||||
num_suffix_args,
|
||||
prefix_patterns,
|
||||
suffix_patterns,
|
||||
vararg_pattern,
|
||||
} = partition_args(program, arg_patterns)?;
|
||||
|
||||
let mut matched_args = Vec::<MatchedArg>::new();
|
||||
|
||||
let prefix = get_range_checked(&args, 0..num_prefix_args)?;
|
||||
let mut prefix_arg_index = 0;
|
||||
for pattern in prefix_patterns {
|
||||
let n = pattern
|
||||
.cardinality()
|
||||
.is_exact()
|
||||
.ok_or(Error::InternalInvariantViolation {
|
||||
message: "expected exact cardinality".to_string(),
|
||||
})?;
|
||||
for positional_arg in &prefix[prefix_arg_index..prefix_arg_index + n] {
|
||||
let matched_arg = MatchedArg::new(
|
||||
positional_arg.index,
|
||||
pattern.arg_type(),
|
||||
&positional_arg.value.clone(),
|
||||
)?;
|
||||
matched_args.push(matched_arg);
|
||||
}
|
||||
prefix_arg_index += n;
|
||||
}
|
||||
|
||||
if num_suffix_args > args.len() {
|
||||
return Err(Error::NotEnoughArgs {
|
||||
program: program.to_string(),
|
||||
args,
|
||||
arg_patterns: arg_patterns.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
let initial_suffix_args_index = args.len() - num_suffix_args;
|
||||
if prefix_arg_index > initial_suffix_args_index {
|
||||
return Err(Error::PrefixOverlapsSuffix {});
|
||||
}
|
||||
|
||||
if let Some(pattern) = vararg_pattern {
|
||||
let vararg = get_range_checked(&args, prefix_arg_index..initial_suffix_args_index)?;
|
||||
match pattern.cardinality() {
|
||||
ArgMatcherCardinality::One => {
|
||||
return Err(Error::InternalInvariantViolation {
|
||||
message: "vararg pattern should not have cardinality of one".to_string(),
|
||||
});
|
||||
}
|
||||
ArgMatcherCardinality::AtLeastOne => {
|
||||
if vararg.is_empty() {
|
||||
return Err(Error::VarargMatcherDidNotMatchAnything {
|
||||
program: program.to_string(),
|
||||
matcher: pattern,
|
||||
});
|
||||
} else {
|
||||
for positional_arg in vararg {
|
||||
let matched_arg = MatchedArg::new(
|
||||
positional_arg.index,
|
||||
pattern.arg_type(),
|
||||
&positional_arg.value.clone(),
|
||||
)?;
|
||||
matched_args.push(matched_arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
ArgMatcherCardinality::ZeroOrMore => {
|
||||
for positional_arg in vararg {
|
||||
let matched_arg = MatchedArg::new(
|
||||
positional_arg.index,
|
||||
pattern.arg_type(),
|
||||
&positional_arg.value.clone(),
|
||||
)?;
|
||||
matched_args.push(matched_arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let suffix = get_range_checked(&args, initial_suffix_args_index..args.len())?;
|
||||
let mut suffix_arg_index = 0;
|
||||
for pattern in suffix_patterns {
|
||||
let n = pattern
|
||||
.cardinality()
|
||||
.is_exact()
|
||||
.ok_or(Error::InternalInvariantViolation {
|
||||
message: "expected exact cardinality".to_string(),
|
||||
})?;
|
||||
for positional_arg in &suffix[suffix_arg_index..suffix_arg_index + n] {
|
||||
let matched_arg = MatchedArg::new(
|
||||
positional_arg.index,
|
||||
pattern.arg_type(),
|
||||
&positional_arg.value.clone(),
|
||||
)?;
|
||||
matched_args.push(matched_arg);
|
||||
}
|
||||
suffix_arg_index += n;
|
||||
}
|
||||
|
||||
if matched_args.len() < args.len() {
|
||||
let extra_args = get_range_checked(&args, matched_args.len()..args.len())?;
|
||||
Err(Error::UnexpectedArguments {
|
||||
program: program.to_string(),
|
||||
args: extra_args.to_vec(),
|
||||
})
|
||||
} else {
|
||||
Ok(matched_args)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct ParitionedArgs {
|
||||
num_prefix_args: usize,
|
||||
num_suffix_args: usize,
|
||||
prefix_patterns: Vec<ArgMatcher>,
|
||||
suffix_patterns: Vec<ArgMatcher>,
|
||||
vararg_pattern: Option<ArgMatcher>,
|
||||
}
|
||||
|
||||
fn partition_args(program: &str, arg_patterns: &Vec<ArgMatcher>) -> Result<ParitionedArgs> {
|
||||
let mut in_prefix = true;
|
||||
let mut partitioned_args = ParitionedArgs::default();
|
||||
|
||||
for pattern in arg_patterns {
|
||||
match pattern.cardinality().is_exact() {
|
||||
Some(n) => {
|
||||
if in_prefix {
|
||||
partitioned_args.prefix_patterns.push(pattern.clone());
|
||||
partitioned_args.num_prefix_args += n;
|
||||
} else {
|
||||
partitioned_args.suffix_patterns.push(pattern.clone());
|
||||
partitioned_args.num_suffix_args += n;
|
||||
}
|
||||
}
|
||||
None => match partitioned_args.vararg_pattern {
|
||||
None => {
|
||||
partitioned_args.vararg_pattern = Some(pattern.clone());
|
||||
in_prefix = false;
|
||||
}
|
||||
Some(existing_pattern) => {
|
||||
return Err(Error::MultipleVarargPatterns {
|
||||
program: program.to_string(),
|
||||
first: existing_pattern,
|
||||
second: pattern.clone(),
|
||||
});
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
Ok(partitioned_args)
|
||||
}
|
||||
|
||||
fn get_range_checked<T>(vec: &[T], range: std::ops::Range<usize>) -> Result<&[T]> {
|
||||
if range.start > range.end {
|
||||
Err(Error::RangeStartExceedsEnd {
|
||||
start: range.start,
|
||||
end: range.end,
|
||||
})
|
||||
} else if range.end > vec.len() {
|
||||
Err(Error::RangeEndOutOfBounds {
|
||||
end: range.end,
|
||||
len: vec.len(),
|
||||
})
|
||||
} else {
|
||||
Ok(&vec[range])
|
||||
}
|
||||
}
|
||||
87
llmx-rs/execpolicy/src/arg_type.rs
Normal file
87
llmx-rs/execpolicy/src/arg_type.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
#![allow(clippy::needless_lifetimes)]
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::error::Result;
|
||||
use crate::sed_command::parse_sed_command;
|
||||
use allocative::Allocative;
|
||||
use derive_more::derive::Display;
|
||||
use serde::Serialize;
|
||||
use starlark::any::ProvidesStaticType;
|
||||
use starlark::values::StarlarkValue;
|
||||
use starlark::values::starlark_value;
|
||||
|
||||
#[derive(Debug, Clone, Display, Eq, PartialEq, ProvidesStaticType, Allocative, Serialize)]
|
||||
#[display("{}", self)]
|
||||
pub enum ArgType {
|
||||
Literal(String),
|
||||
/// We cannot say what this argument represents, but it is *not* a file path.
|
||||
OpaqueNonFile,
|
||||
/// A file (or directory) that can be expected to be read as part of this command.
|
||||
ReadableFile,
|
||||
/// A file (or directory) that can be expected to be written as part of this command.
|
||||
WriteableFile,
|
||||
/// Positive integer, like one that is required for `head -n`.
|
||||
PositiveInteger,
|
||||
/// Bespoke arg type for a safe sed command.
|
||||
SedCommand,
|
||||
/// Type is unknown: it may or may not be a file.
|
||||
Unknown,
|
||||
}
|
||||
|
||||
impl ArgType {
|
||||
pub fn validate(&self, value: &str) -> Result<()> {
|
||||
match self {
|
||||
ArgType::Literal(literal_value) => {
|
||||
if value != *literal_value {
|
||||
Err(Error::LiteralValueDidNotMatch {
|
||||
expected: literal_value.clone(),
|
||||
actual: value.to_string(),
|
||||
})
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
ArgType::ReadableFile => {
|
||||
if value.is_empty() {
|
||||
Err(Error::EmptyFileName {})
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
ArgType::WriteableFile => {
|
||||
if value.is_empty() {
|
||||
Err(Error::EmptyFileName {})
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
ArgType::OpaqueNonFile | ArgType::Unknown => Ok(()),
|
||||
ArgType::PositiveInteger => match value.parse::<u64>() {
|
||||
Ok(0) => Err(Error::InvalidPositiveInteger {
|
||||
value: value.to_string(),
|
||||
}),
|
||||
Ok(_) => Ok(()),
|
||||
Err(_) => Err(Error::InvalidPositiveInteger {
|
||||
value: value.to_string(),
|
||||
}),
|
||||
},
|
||||
ArgType::SedCommand => parse_sed_command(value),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn might_write_file(&self) -> bool {
|
||||
match self {
|
||||
ArgType::WriteableFile | ArgType::Unknown => true,
|
||||
ArgType::Literal(_)
|
||||
| ArgType::OpaqueNonFile
|
||||
| ArgType::PositiveInteger
|
||||
| ArgType::ReadableFile
|
||||
| ArgType::SedCommand => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[starlark_value(type = "ArgType")]
|
||||
impl<'v> StarlarkValue<'v> for ArgType {
|
||||
type Canonical = ArgType;
|
||||
}
|
||||
202
llmx-rs/execpolicy/src/default.policy
Normal file
202
llmx-rs/execpolicy/src/default.policy
Normal file
@@ -0,0 +1,202 @@
|
||||
"""
|
||||
define_program() supports the following arguments:
|
||||
- program: the name of the program
|
||||
- system_path: list of absolute paths on the system where program can likely be found
|
||||
- option_bundling (PLANNED): whether to allow bundling of options (e.g. `-al` for `-a -l`)
|
||||
- combine_format (PLANNED): whether to allow `--option=value` (as opposed to `--option value`)
|
||||
- options: the command-line flags/options: use flag() and opt() to define these
|
||||
- args: the rules for what arguments are allowed that are not "options"
|
||||
- should_match: list of command-line invocations that should be matched by the rule
|
||||
- should_not_match: list of command-line invocations that should not be matched by the rule
|
||||
"""
|
||||
|
||||
define_program(
|
||||
program="ls",
|
||||
system_path=["/bin/ls", "/usr/bin/ls"],
|
||||
options=[
|
||||
flag("-1"),
|
||||
flag("-a"),
|
||||
flag("-l"),
|
||||
],
|
||||
args=[ARG_RFILES_OR_CWD],
|
||||
)
|
||||
|
||||
define_program(
|
||||
program="cat",
|
||||
options=[
|
||||
flag("-b"),
|
||||
flag("-n"),
|
||||
flag("-t"),
|
||||
],
|
||||
system_path=["/bin/cat", "/usr/bin/cat"],
|
||||
args=[ARG_RFILES],
|
||||
should_match=[
|
||||
["file.txt"],
|
||||
["-n", "file.txt"],
|
||||
["-b", "file.txt"],
|
||||
],
|
||||
should_not_match=[
|
||||
# While cat without args is valid, it will read from stdin, which
|
||||
# does not seem appropriate for our current use case.
|
||||
[],
|
||||
# Let's not auto-approve advisory locking.
|
||||
["-l", "file.txt"],
|
||||
]
|
||||
)
|
||||
|
||||
define_program(
|
||||
program="cp",
|
||||
options=[
|
||||
flag("-r"),
|
||||
flag("-R"),
|
||||
flag("--recursive"),
|
||||
],
|
||||
args=[ARG_RFILES, ARG_WFILE],
|
||||
system_path=["/bin/cp", "/usr/bin/cp"],
|
||||
should_match=[
|
||||
["foo", "bar"],
|
||||
],
|
||||
should_not_match=[
|
||||
["foo"],
|
||||
],
|
||||
)
|
||||
|
||||
define_program(
|
||||
program="head",
|
||||
system_path=["/bin/head", "/usr/bin/head"],
|
||||
options=[
|
||||
opt("-c", ARG_POS_INT),
|
||||
opt("-n", ARG_POS_INT),
|
||||
],
|
||||
args=[ARG_RFILES],
|
||||
)
|
||||
|
||||
printenv_system_path = ["/usr/bin/printenv"]
|
||||
|
||||
# Print all environment variables.
|
||||
define_program(
|
||||
program="printenv",
|
||||
args=[],
|
||||
system_path=printenv_system_path,
|
||||
# This variant of `printenv` only allows zero args.
|
||||
should_match=[[]],
|
||||
should_not_match=[["PATH"]],
|
||||
)
|
||||
|
||||
# Print a specific environment variable.
|
||||
define_program(
|
||||
program="printenv",
|
||||
args=[ARG_OPAQUE_VALUE],
|
||||
system_path=printenv_system_path,
|
||||
# This variant of `printenv` only allows exactly one arg.
|
||||
should_match=[["PATH"]],
|
||||
should_not_match=[[], ["PATH", "HOME"]],
|
||||
)
|
||||
|
||||
# Note that `pwd` is generally implemented as a shell built-in. It does not
|
||||
# accept any arguments.
|
||||
define_program(
|
||||
program="pwd",
|
||||
options=[
|
||||
flag("-L"),
|
||||
flag("-P"),
|
||||
],
|
||||
args=[],
|
||||
)
|
||||
|
||||
define_program(
|
||||
program="rg",
|
||||
options=[
|
||||
opt("-A", ARG_POS_INT),
|
||||
opt("-B", ARG_POS_INT),
|
||||
opt("-C", ARG_POS_INT),
|
||||
opt("-d", ARG_POS_INT),
|
||||
opt("--max-depth", ARG_POS_INT),
|
||||
opt("-g", ARG_OPAQUE_VALUE),
|
||||
opt("--glob", ARG_OPAQUE_VALUE),
|
||||
opt("-m", ARG_POS_INT),
|
||||
opt("--max-count", ARG_POS_INT),
|
||||
|
||||
flag("-n"),
|
||||
flag("-i"),
|
||||
flag("-l"),
|
||||
flag("--files"),
|
||||
flag("--files-with-matches"),
|
||||
flag("--files-without-match"),
|
||||
],
|
||||
args=[ARG_OPAQUE_VALUE, ARG_RFILES_OR_CWD],
|
||||
should_match=[
|
||||
["-n", "init"],
|
||||
["-n", "init", "."],
|
||||
["-i", "-n", "init", "src"],
|
||||
["--files", "--max-depth", "2", "."],
|
||||
],
|
||||
should_not_match=[
|
||||
["-m", "-n", "init"],
|
||||
["--glob", "src"],
|
||||
],
|
||||
# TODO(mbolin): Perhaps we need a way to indicate that we expect `rg` to be
|
||||
# bundled with the host environment and we should be using that version.
|
||||
system_path=[],
|
||||
)
|
||||
|
||||
# Unfortunately, `sed` is difficult to secure because GNU sed supports an `e`
|
||||
# flag where `s/pattern/replacement/e` would run `replacement` as a shell
|
||||
# command every time `pattern` is matched. For example, try the following on
|
||||
# Ubuntu (which uses GNU sed, unlike macOS):
|
||||
#
|
||||
# ```shell
|
||||
# $ yes | head -n 4 > /tmp/yes.txt
|
||||
# $ sed 's/y/echo hi/e' /tmp/yes.txt
|
||||
# hi
|
||||
# hi
|
||||
# hi
|
||||
# hi
|
||||
# ```
|
||||
#
|
||||
# As you can see, `echo hi` got executed four times. In order to support some
|
||||
# basic sed functionality, we implement a bespoke `ARG_SED_COMMAND` that matches
|
||||
# only "known safe" sed commands.
|
||||
common_sed_flags = [
|
||||
# We deliberately do not support -i or -f.
|
||||
flag("-n"),
|
||||
flag("-u"),
|
||||
]
|
||||
sed_system_path = ["/usr/bin/sed"]
|
||||
|
||||
# When -e is not specified, the first argument must be a valid sed command.
|
||||
define_program(
|
||||
program="sed",
|
||||
options=common_sed_flags,
|
||||
args=[ARG_SED_COMMAND, ARG_RFILES],
|
||||
system_path=sed_system_path,
|
||||
)
|
||||
|
||||
# When -e is required, all arguments are assumed to be readable files.
|
||||
define_program(
|
||||
program="sed",
|
||||
options=common_sed_flags + [
|
||||
opt("-e", ARG_SED_COMMAND, required=True),
|
||||
],
|
||||
args=[ARG_RFILES],
|
||||
system_path=sed_system_path,
|
||||
)
|
||||
|
||||
define_program(
|
||||
program="which",
|
||||
options=[
|
||||
flag("-a"),
|
||||
flag("-s"),
|
||||
],
|
||||
# Surprisingly, `which` takes more than one argument.
|
||||
args=[ARG_RFILES],
|
||||
should_match=[
|
||||
["python3"],
|
||||
["-a", "python3"],
|
||||
["-a", "python3", "cargo"],
|
||||
],
|
||||
should_not_match=[
|
||||
[],
|
||||
],
|
||||
system_path=["/bin/which", "/usr/bin/which"],
|
||||
)
|
||||
96
llmx-rs/execpolicy/src/error.rs
Normal file
96
llmx-rs/execpolicy/src/error.rs
Normal file
@@ -0,0 +1,96 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::arg_matcher::ArgMatcher;
|
||||
use crate::arg_resolver::PositionalArg;
|
||||
use serde_with::DisplayFromStr;
|
||||
use serde_with::serde_as;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Eq, PartialEq, Serialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum Error {
|
||||
NoSpecForProgram {
|
||||
program: String,
|
||||
},
|
||||
OptionMissingValue {
|
||||
program: String,
|
||||
option: String,
|
||||
},
|
||||
OptionFollowedByOptionInsteadOfValue {
|
||||
program: String,
|
||||
option: String,
|
||||
value: String,
|
||||
},
|
||||
UnknownOption {
|
||||
program: String,
|
||||
option: String,
|
||||
},
|
||||
UnexpectedArguments {
|
||||
program: String,
|
||||
args: Vec<PositionalArg>,
|
||||
},
|
||||
DoubleDashNotSupportedYet {
|
||||
program: String,
|
||||
},
|
||||
MultipleVarargPatterns {
|
||||
program: String,
|
||||
first: ArgMatcher,
|
||||
second: ArgMatcher,
|
||||
},
|
||||
RangeStartExceedsEnd {
|
||||
start: usize,
|
||||
end: usize,
|
||||
},
|
||||
RangeEndOutOfBounds {
|
||||
end: usize,
|
||||
len: usize,
|
||||
},
|
||||
PrefixOverlapsSuffix {},
|
||||
NotEnoughArgs {
|
||||
program: String,
|
||||
args: Vec<PositionalArg>,
|
||||
arg_patterns: Vec<ArgMatcher>,
|
||||
},
|
||||
InternalInvariantViolation {
|
||||
message: String,
|
||||
},
|
||||
VarargMatcherDidNotMatchAnything {
|
||||
program: String,
|
||||
matcher: ArgMatcher,
|
||||
},
|
||||
EmptyFileName {},
|
||||
LiteralValueDidNotMatch {
|
||||
expected: String,
|
||||
actual: String,
|
||||
},
|
||||
InvalidPositiveInteger {
|
||||
value: String,
|
||||
},
|
||||
MissingRequiredOptions {
|
||||
program: String,
|
||||
options: Vec<String>,
|
||||
},
|
||||
SedCommandNotProvablySafe {
|
||||
command: String,
|
||||
},
|
||||
ReadablePathNotInReadableFolders {
|
||||
file: PathBuf,
|
||||
folders: Vec<PathBuf>,
|
||||
},
|
||||
WriteablePathNotInWriteableFolders {
|
||||
file: PathBuf,
|
||||
folders: Vec<PathBuf>,
|
||||
},
|
||||
CannotCheckRelativePath {
|
||||
file: PathBuf,
|
||||
},
|
||||
CannotCanonicalizePath {
|
||||
file: String,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
error: std::io::ErrorKind,
|
||||
},
|
||||
}
|
||||
28
llmx-rs/execpolicy/src/exec_call.rs
Normal file
28
llmx-rs/execpolicy/src/exec_call.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
|
||||
pub struct ExecCall {
|
||||
pub program: String,
|
||||
pub args: Vec<String>,
|
||||
}
|
||||
|
||||
impl ExecCall {
|
||||
pub fn new(program: &str, args: &[&str]) -> Self {
|
||||
Self {
|
||||
program: program.to_string(),
|
||||
args: args.iter().map(|&s| s.into()).collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ExecCall {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.program)?;
|
||||
for arg in &self.args {
|
||||
write!(f, " {arg}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
294
llmx-rs/execpolicy/src/execv_checker.rs
Normal file
294
llmx-rs/execpolicy/src/execv_checker.rs
Normal file
@@ -0,0 +1,294 @@
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::OsString;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::ArgType;
|
||||
use crate::Error::CannotCanonicalizePath;
|
||||
use crate::Error::CannotCheckRelativePath;
|
||||
use crate::Error::ReadablePathNotInReadableFolders;
|
||||
use crate::Error::WriteablePathNotInWriteableFolders;
|
||||
use crate::ExecCall;
|
||||
use crate::MatchedExec;
|
||||
use crate::Policy;
|
||||
use crate::Result;
|
||||
use crate::ValidExec;
|
||||
use path_absolutize::*;
|
||||
|
||||
macro_rules! check_file_in_folders {
|
||||
($file:expr, $folders:expr, $error:ident) => {
|
||||
if !$folders.iter().any(|folder| $file.starts_with(folder)) {
|
||||
return Err($error {
|
||||
file: $file.clone(),
|
||||
folders: $folders.to_vec(),
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub struct ExecvChecker {
|
||||
execv_policy: Policy,
|
||||
}
|
||||
|
||||
impl ExecvChecker {
|
||||
pub fn new(execv_policy: Policy) -> Self {
|
||||
Self { execv_policy }
|
||||
}
|
||||
|
||||
pub fn r#match(&self, exec_call: &ExecCall) -> Result<MatchedExec> {
|
||||
self.execv_policy.check(exec_call)
|
||||
}
|
||||
|
||||
/// The caller is responsible for ensuring readable_folders and
|
||||
/// writeable_folders are in canonical form.
|
||||
pub fn check(
|
||||
&self,
|
||||
valid_exec: ValidExec,
|
||||
cwd: &Option<OsString>,
|
||||
readable_folders: &[PathBuf],
|
||||
writeable_folders: &[PathBuf],
|
||||
) -> Result<String> {
|
||||
for (arg_type, value) in valid_exec
|
||||
.args
|
||||
.into_iter()
|
||||
.map(|arg| (arg.r#type, arg.value))
|
||||
.chain(
|
||||
valid_exec
|
||||
.opts
|
||||
.into_iter()
|
||||
.map(|opt| (opt.r#type, opt.value)),
|
||||
)
|
||||
{
|
||||
match arg_type {
|
||||
ArgType::ReadableFile => {
|
||||
let readable_file = ensure_absolute_path(&value, cwd)?;
|
||||
check_file_in_folders!(
|
||||
readable_file,
|
||||
readable_folders,
|
||||
ReadablePathNotInReadableFolders
|
||||
);
|
||||
}
|
||||
ArgType::WriteableFile => {
|
||||
let writeable_file = ensure_absolute_path(&value, cwd)?;
|
||||
check_file_in_folders!(
|
||||
writeable_file,
|
||||
writeable_folders,
|
||||
WriteablePathNotInWriteableFolders
|
||||
);
|
||||
}
|
||||
ArgType::OpaqueNonFile
|
||||
| ArgType::Unknown
|
||||
| ArgType::PositiveInteger
|
||||
| ArgType::SedCommand
|
||||
| ArgType::Literal(_) => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut program = valid_exec.program.to_string();
|
||||
for system_path in valid_exec.system_path {
|
||||
if is_executable_file(&system_path) {
|
||||
program = system_path;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(program)
|
||||
}
|
||||
}
|
||||
|
||||
fn ensure_absolute_path(path: &str, cwd: &Option<OsString>) -> Result<PathBuf> {
|
||||
let file = PathBuf::from(path);
|
||||
let result = if file.is_relative() {
|
||||
match cwd {
|
||||
Some(cwd) => file.absolutize_from(cwd),
|
||||
None => return Err(CannotCheckRelativePath { file }),
|
||||
}
|
||||
} else {
|
||||
file.absolutize()
|
||||
};
|
||||
result
|
||||
.map(Cow::into_owned)
|
||||
.map_err(|error| CannotCanonicalizePath {
|
||||
file: path.to_string(),
|
||||
error: error.kind(),
|
||||
})
|
||||
}
|
||||
|
||||
fn is_executable_file(path: &str) -> bool {
|
||||
let file_path = Path::new(path);
|
||||
|
||||
if let Ok(metadata) = std::fs::metadata(file_path) {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let permissions = metadata.permissions();
|
||||
|
||||
// Check if the file is executable (by checking the executable bit for the owner)
|
||||
return metadata.is_file() && (permissions.mode() & 0o111 != 0);
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
{
|
||||
// TODO(mbolin): Check against PATHEXT environment variable.
|
||||
return metadata.is_file();
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tempfile::TempDir;
|
||||
|
||||
use super::*;
|
||||
use crate::MatchedArg;
|
||||
use crate::PolicyParser;
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
|
||||
fn setup(fake_cp: &Path) -> ExecvChecker {
|
||||
let source = format!(
|
||||
r#"
|
||||
define_program(
|
||||
program="cp",
|
||||
args=[ARG_RFILE, ARG_WFILE],
|
||||
system_path=[{fake_cp:?}]
|
||||
)
|
||||
"#
|
||||
);
|
||||
let parser = PolicyParser::new("#test", &source);
|
||||
let policy = parser.parse().unwrap();
|
||||
ExecvChecker::new(policy)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_check_valid_input_files() -> Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
|
||||
// Create an executable file that can be used with the system_path arg.
|
||||
let fake_cp = temp_dir.path().join("cp");
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let fake_cp_file = std::fs::File::create(&fake_cp)?;
|
||||
let mut permissions = fake_cp_file.metadata()?.permissions();
|
||||
permissions.set_mode(0o755);
|
||||
std::fs::set_permissions(&fake_cp, permissions)?;
|
||||
}
|
||||
#[cfg(windows)]
|
||||
{
|
||||
std::fs::File::create(&fake_cp)?;
|
||||
}
|
||||
|
||||
// Create root_path and reference to files under the root.
|
||||
let root_path = temp_dir.path().to_path_buf();
|
||||
let source_path = root_path.join("source");
|
||||
let dest_path = root_path.join("dest");
|
||||
|
||||
let cp = fake_cp.to_str().unwrap().to_string();
|
||||
let root = root_path.to_str().unwrap().to_string();
|
||||
let source = source_path.to_str().unwrap().to_string();
|
||||
let dest = dest_path.to_str().unwrap().to_string();
|
||||
|
||||
let cwd = Some(root_path.clone().into());
|
||||
|
||||
let checker = setup(&fake_cp);
|
||||
let exec_call = ExecCall {
|
||||
program: "cp".into(),
|
||||
args: vec![source, dest.clone()],
|
||||
};
|
||||
let valid_exec = match checker.r#match(&exec_call).map_err(|e| anyhow!("{e:?}"))? {
|
||||
MatchedExec::Match { exec } => exec,
|
||||
unexpected => panic!("Expected a safe exec but got {unexpected:?}"),
|
||||
};
|
||||
|
||||
// No readable or writeable folders specified.
|
||||
assert_eq!(
|
||||
checker.check(valid_exec.clone(), &cwd, &[], &[]),
|
||||
Err(ReadablePathNotInReadableFolders {
|
||||
file: source_path,
|
||||
folders: vec![]
|
||||
}),
|
||||
);
|
||||
|
||||
// Only readable folders specified.
|
||||
assert_eq!(
|
||||
checker.check(
|
||||
valid_exec.clone(),
|
||||
&cwd,
|
||||
std::slice::from_ref(&root_path),
|
||||
&[]
|
||||
),
|
||||
Err(WriteablePathNotInWriteableFolders {
|
||||
file: dest_path.clone(),
|
||||
folders: vec![]
|
||||
}),
|
||||
);
|
||||
|
||||
// Both readable and writeable folders specified.
|
||||
assert_eq!(
|
||||
checker.check(
|
||||
valid_exec,
|
||||
&cwd,
|
||||
std::slice::from_ref(&root_path),
|
||||
std::slice::from_ref(&root_path)
|
||||
),
|
||||
Ok(cp.clone()),
|
||||
);
|
||||
|
||||
// Args are the readable and writeable folders, not files within the
|
||||
// folders.
|
||||
let exec_call_folders_as_args = ExecCall {
|
||||
program: "cp".into(),
|
||||
args: vec![root.clone(), root],
|
||||
};
|
||||
let valid_exec_call_folders_as_args = match checker
|
||||
.r#match(&exec_call_folders_as_args)
|
||||
.map_err(|e| anyhow!("{e:?}"))?
|
||||
{
|
||||
MatchedExec::Match { exec } => exec,
|
||||
_ => panic!("Expected a safe exec"),
|
||||
};
|
||||
assert_eq!(
|
||||
checker.check(
|
||||
valid_exec_call_folders_as_args,
|
||||
&cwd,
|
||||
std::slice::from_ref(&root_path),
|
||||
std::slice::from_ref(&root_path)
|
||||
),
|
||||
Ok(cp),
|
||||
);
|
||||
|
||||
// Specify a parent of a readable folder as input.
|
||||
let exec_with_parent_of_readable_folder = ValidExec {
|
||||
program: "cp".into(),
|
||||
args: vec![
|
||||
MatchedArg::new(
|
||||
0,
|
||||
ArgType::ReadableFile,
|
||||
root_path.parent().unwrap().to_str().unwrap(),
|
||||
)
|
||||
.map_err(|e| anyhow!("{e:?}"))?,
|
||||
MatchedArg::new(1, ArgType::WriteableFile, &dest).map_err(|e| anyhow!("{e:?}"))?,
|
||||
],
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(
|
||||
checker.check(
|
||||
exec_with_parent_of_readable_folder,
|
||||
&cwd,
|
||||
std::slice::from_ref(&root_path),
|
||||
std::slice::from_ref(&dest_path)
|
||||
),
|
||||
Err(ReadablePathNotInReadableFolders {
|
||||
file: root_path.parent().unwrap().to_path_buf(),
|
||||
folders: vec![root_path.clone()]
|
||||
}),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
45
llmx-rs/execpolicy/src/lib.rs
Normal file
45
llmx-rs/execpolicy/src/lib.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
#![allow(clippy::type_complexity)]
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
#[macro_use]
|
||||
extern crate starlark;
|
||||
|
||||
mod arg_matcher;
|
||||
mod arg_resolver;
|
||||
mod arg_type;
|
||||
mod error;
|
||||
mod exec_call;
|
||||
mod execv_checker;
|
||||
mod opt;
|
||||
mod policy;
|
||||
mod policy_parser;
|
||||
mod program;
|
||||
mod sed_command;
|
||||
mod valid_exec;
|
||||
|
||||
pub use arg_matcher::ArgMatcher;
|
||||
pub use arg_resolver::PositionalArg;
|
||||
pub use arg_type::ArgType;
|
||||
pub use error::Error;
|
||||
pub use error::Result;
|
||||
pub use exec_call::ExecCall;
|
||||
pub use execv_checker::ExecvChecker;
|
||||
pub use opt::Opt;
|
||||
pub use policy::Policy;
|
||||
pub use policy_parser::PolicyParser;
|
||||
pub use program::Forbidden;
|
||||
pub use program::MatchedExec;
|
||||
pub use program::NegativeExamplePassedCheck;
|
||||
pub use program::PositiveExampleFailedCheck;
|
||||
pub use program::ProgramSpec;
|
||||
pub use sed_command::parse_sed_command;
|
||||
pub use valid_exec::MatchedArg;
|
||||
pub use valid_exec::MatchedFlag;
|
||||
pub use valid_exec::MatchedOpt;
|
||||
pub use valid_exec::ValidExec;
|
||||
|
||||
const DEFAULT_POLICY: &str = include_str!("default.policy");
|
||||
|
||||
pub fn get_default_policy() -> starlark::Result<Policy> {
|
||||
let parser = PolicyParser::new("#default", DEFAULT_POLICY);
|
||||
parser.parse()
|
||||
}
|
||||
167
llmx-rs/execpolicy/src/main.rs
Normal file
167
llmx-rs/execpolicy/src/main.rs
Normal file
@@ -0,0 +1,167 @@
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use clap::Subcommand;
|
||||
use codex_execpolicy::ExecCall;
|
||||
use codex_execpolicy::MatchedExec;
|
||||
use codex_execpolicy::Policy;
|
||||
use codex_execpolicy::PolicyParser;
|
||||
use codex_execpolicy::ValidExec;
|
||||
use codex_execpolicy::get_default_policy;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde::de;
|
||||
use starlark::Error as StarlarkError;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
|
||||
const MATCHED_BUT_WRITES_FILES_EXIT_CODE: i32 = 12;
|
||||
const MIGHT_BE_SAFE_EXIT_CODE: i32 = 13;
|
||||
const FORBIDDEN_EXIT_CODE: i32 = 14;
|
||||
|
||||
#[derive(Parser, Deserialize, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
pub struct Args {
|
||||
/// If the command fails the policy, exit with 13, but print parseable JSON
|
||||
/// to stdout.
|
||||
#[clap(long)]
|
||||
pub require_safe: bool,
|
||||
|
||||
/// Path to the policy file.
|
||||
#[clap(long, short = 'p')]
|
||||
pub policy: Option<PathBuf>,
|
||||
|
||||
#[command(subcommand)]
|
||||
pub command: Command,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Subcommand)]
|
||||
pub enum Command {
|
||||
/// Checks the command as if the arguments were the inputs to execv(3).
|
||||
Check {
|
||||
#[arg(trailing_var_arg = true)]
|
||||
command: Vec<String>,
|
||||
},
|
||||
|
||||
/// Checks the command encoded as a JSON object.
|
||||
#[clap(name = "check-json")]
|
||||
CheckJson {
|
||||
/// JSON object with "program" (str) and "args" (list[str]) fields.
|
||||
#[serde(deserialize_with = "deserialize_from_json")]
|
||||
exec: ExecArg,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct ExecArg {
|
||||
pub program: String,
|
||||
|
||||
#[serde(default)]
|
||||
pub args: Vec<String>,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
env_logger::init();
|
||||
|
||||
let args = Args::parse();
|
||||
let policy = match args.policy {
|
||||
Some(policy) => {
|
||||
let policy_source = policy.to_string_lossy().to_string();
|
||||
let unparsed_policy = std::fs::read_to_string(policy)?;
|
||||
let parser = PolicyParser::new(&policy_source, &unparsed_policy);
|
||||
parser.parse()
|
||||
}
|
||||
None => get_default_policy(),
|
||||
};
|
||||
let policy = policy.map_err(StarlarkError::into_anyhow)?;
|
||||
|
||||
let exec = match args.command {
|
||||
Command::Check { command } => match command.split_first() {
|
||||
Some((first, rest)) => ExecArg {
|
||||
program: first.to_string(),
|
||||
args: rest.to_vec(),
|
||||
},
|
||||
None => {
|
||||
eprintln!("no command provided");
|
||||
std::process::exit(1);
|
||||
}
|
||||
},
|
||||
Command::CheckJson { exec } => exec,
|
||||
};
|
||||
|
||||
let (output, exit_code) = check_command(&policy, exec, args.require_safe);
|
||||
let json = serde_json::to_string(&output)?;
|
||||
println!("{json}");
|
||||
std::process::exit(exit_code);
|
||||
}
|
||||
|
||||
fn check_command(
|
||||
policy: &Policy,
|
||||
ExecArg { program, args }: ExecArg,
|
||||
check: bool,
|
||||
) -> (Output, i32) {
|
||||
let exec_call = ExecCall { program, args };
|
||||
match policy.check(&exec_call) {
|
||||
Ok(MatchedExec::Match { exec }) => {
|
||||
if exec.might_write_files() {
|
||||
let exit_code = if check {
|
||||
MATCHED_BUT_WRITES_FILES_EXIT_CODE
|
||||
} else {
|
||||
0
|
||||
};
|
||||
(Output::Match { r#match: exec }, exit_code)
|
||||
} else {
|
||||
(Output::Safe { r#match: exec }, 0)
|
||||
}
|
||||
}
|
||||
Ok(MatchedExec::Forbidden { reason, cause }) => {
|
||||
let exit_code = if check { FORBIDDEN_EXIT_CODE } else { 0 };
|
||||
(Output::Forbidden { reason, cause }, exit_code)
|
||||
}
|
||||
Err(err) => {
|
||||
let exit_code = if check { MIGHT_BE_SAFE_EXIT_CODE } else { 0 };
|
||||
(Output::Unverified { error: err }, exit_code)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(tag = "result")]
|
||||
pub enum Output {
|
||||
/// The command is verified as safe.
|
||||
#[serde(rename = "safe")]
|
||||
Safe { r#match: ValidExec },
|
||||
|
||||
/// The command has matched a rule in the policy, but the caller should
|
||||
/// decide whether it is "safe" given the files it wants to write.
|
||||
#[serde(rename = "match")]
|
||||
Match { r#match: ValidExec },
|
||||
|
||||
/// The user is forbidden from running the command.
|
||||
#[serde(rename = "forbidden")]
|
||||
Forbidden {
|
||||
reason: String,
|
||||
cause: codex_execpolicy::Forbidden,
|
||||
},
|
||||
|
||||
/// The safety of the command could not be verified.
|
||||
#[serde(rename = "unverified")]
|
||||
Unverified { error: codex_execpolicy::Error },
|
||||
}
|
||||
|
||||
fn deserialize_from_json<'de, D>(deserializer: D) -> Result<ExecArg, D::Error>
|
||||
where
|
||||
D: de::Deserializer<'de>,
|
||||
{
|
||||
let s = String::deserialize(deserializer)?;
|
||||
let decoded = serde_json::from_str(&s)
|
||||
.map_err(|e| serde::de::Error::custom(format!("JSON parse error: {e}")))?;
|
||||
Ok(decoded)
|
||||
}
|
||||
|
||||
impl FromStr for ExecArg {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
serde_json::from_str(s).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
77
llmx-rs/execpolicy/src/opt.rs
Normal file
77
llmx-rs/execpolicy/src/opt.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
#![allow(clippy::needless_lifetimes)]
|
||||
|
||||
use crate::ArgType;
|
||||
use crate::starlark::values::ValueLike;
|
||||
use allocative::Allocative;
|
||||
use derive_more::derive::Display;
|
||||
use starlark::any::ProvidesStaticType;
|
||||
use starlark::values::AllocValue;
|
||||
use starlark::values::Heap;
|
||||
use starlark::values::NoSerialize;
|
||||
use starlark::values::StarlarkValue;
|
||||
use starlark::values::UnpackValue;
|
||||
use starlark::values::Value;
|
||||
use starlark::values::starlark_value;
|
||||
|
||||
/// Command line option that takes a value.
|
||||
#[derive(Clone, Debug, Display, PartialEq, Eq, ProvidesStaticType, NoSerialize, Allocative)]
|
||||
#[display("opt({})", opt)]
|
||||
pub struct Opt {
|
||||
/// The option as typed on the command line, e.g., `-h` or `--help`. If
|
||||
/// it can be used in the `--name=value` format, then this should be
|
||||
/// `--name` (though this is subject to change).
|
||||
pub opt: String,
|
||||
pub meta: OptMeta,
|
||||
pub required: bool,
|
||||
}
|
||||
|
||||
/// When defining an Opt, use as specific an OptMeta as possible.
|
||||
#[derive(Clone, Debug, Display, PartialEq, Eq, ProvidesStaticType, NoSerialize, Allocative)]
|
||||
#[display("{}", self)]
|
||||
pub enum OptMeta {
|
||||
/// Option does not take a value.
|
||||
Flag,
|
||||
|
||||
/// Option takes a single value matching the specified type.
|
||||
Value(ArgType),
|
||||
}
|
||||
|
||||
impl Opt {
|
||||
pub fn new(opt: String, meta: OptMeta, required: bool) -> Self {
|
||||
Self {
|
||||
opt,
|
||||
meta,
|
||||
required,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &str {
|
||||
&self.opt
|
||||
}
|
||||
}
|
||||
|
||||
#[starlark_value(type = "Opt")]
|
||||
impl<'v> StarlarkValue<'v> for Opt {
|
||||
type Canonical = Opt;
|
||||
}
|
||||
|
||||
impl<'v> UnpackValue<'v> for Opt {
|
||||
type Error = starlark::Error;
|
||||
|
||||
fn unpack_value_impl(value: Value<'v>) -> starlark::Result<Option<Self>> {
|
||||
// TODO(mbolin): It fels like this should be doable without cloning?
|
||||
// Cannot simply consume the value?
|
||||
Ok(value.downcast_ref::<Opt>().cloned())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'v> AllocValue<'v> for Opt {
|
||||
fn alloc_value(self, heap: &'v Heap) -> Value<'v> {
|
||||
heap.alloc_simple(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[starlark_value(type = "OptMeta")]
|
||||
impl<'v> StarlarkValue<'v> for OptMeta {
|
||||
type Canonical = OptMeta;
|
||||
}
|
||||
103
llmx-rs/execpolicy/src/policy.rs
Normal file
103
llmx-rs/execpolicy/src/policy.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
use multimap::MultiMap;
|
||||
use regex_lite::Error as RegexError;
|
||||
use regex_lite::Regex;
|
||||
|
||||
use crate::ExecCall;
|
||||
use crate::Forbidden;
|
||||
use crate::MatchedExec;
|
||||
use crate::NegativeExamplePassedCheck;
|
||||
use crate::ProgramSpec;
|
||||
use crate::error::Error;
|
||||
use crate::error::Result;
|
||||
use crate::policy_parser::ForbiddenProgramRegex;
|
||||
use crate::program::PositiveExampleFailedCheck;
|
||||
|
||||
pub struct Policy {
|
||||
programs: MultiMap<String, ProgramSpec>,
|
||||
forbidden_program_regexes: Vec<ForbiddenProgramRegex>,
|
||||
forbidden_substrings_pattern: Option<Regex>,
|
||||
}
|
||||
|
||||
impl Policy {
|
||||
pub fn new(
|
||||
programs: MultiMap<String, ProgramSpec>,
|
||||
forbidden_program_regexes: Vec<ForbiddenProgramRegex>,
|
||||
forbidden_substrings: Vec<String>,
|
||||
) -> std::result::Result<Self, RegexError> {
|
||||
let forbidden_substrings_pattern = if forbidden_substrings.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let escaped_substrings = forbidden_substrings
|
||||
.iter()
|
||||
.map(|s| regex_lite::escape(s))
|
||||
.collect::<Vec<_>>()
|
||||
.join("|");
|
||||
Some(Regex::new(&format!("({escaped_substrings})"))?)
|
||||
};
|
||||
Ok(Self {
|
||||
programs,
|
||||
forbidden_program_regexes,
|
||||
forbidden_substrings_pattern,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn check(&self, exec_call: &ExecCall) -> Result<MatchedExec> {
|
||||
let ExecCall { program, args } = &exec_call;
|
||||
for ForbiddenProgramRegex { regex, reason } in &self.forbidden_program_regexes {
|
||||
if regex.is_match(program) {
|
||||
return Ok(MatchedExec::Forbidden {
|
||||
cause: Forbidden::Program {
|
||||
program: program.clone(),
|
||||
exec_call: exec_call.clone(),
|
||||
},
|
||||
reason: reason.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for arg in args {
|
||||
if let Some(regex) = &self.forbidden_substrings_pattern
|
||||
&& regex.is_match(arg)
|
||||
{
|
||||
return Ok(MatchedExec::Forbidden {
|
||||
cause: Forbidden::Arg {
|
||||
arg: arg.clone(),
|
||||
exec_call: exec_call.clone(),
|
||||
},
|
||||
reason: format!("arg `{arg}` contains forbidden substring"),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut last_err = Err(Error::NoSpecForProgram {
|
||||
program: program.clone(),
|
||||
});
|
||||
if let Some(spec_list) = self.programs.get_vec(program) {
|
||||
for spec in spec_list {
|
||||
match spec.check(exec_call) {
|
||||
Ok(matched_exec) => return Ok(matched_exec),
|
||||
Err(err) => {
|
||||
last_err = Err(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
last_err
|
||||
}
|
||||
|
||||
pub fn check_each_good_list_individually(&self) -> Vec<PositiveExampleFailedCheck> {
|
||||
let mut violations = Vec::new();
|
||||
for (_program, spec) in self.programs.flat_iter() {
|
||||
violations.extend(spec.verify_should_match_list());
|
||||
}
|
||||
violations
|
||||
}
|
||||
|
||||
pub fn check_each_bad_list_individually(&self) -> Vec<NegativeExamplePassedCheck> {
|
||||
let mut violations = Vec::new();
|
||||
for (_program, spec) in self.programs.flat_iter() {
|
||||
violations.extend(spec.verify_should_not_match_list());
|
||||
}
|
||||
violations
|
||||
}
|
||||
}
|
||||
226
llmx-rs/execpolicy/src/policy_parser.rs
Normal file
226
llmx-rs/execpolicy/src/policy_parser.rs
Normal file
@@ -0,0 +1,226 @@
|
||||
#![allow(clippy::needless_lifetimes)]
|
||||
|
||||
use crate::Opt;
|
||||
use crate::Policy;
|
||||
use crate::ProgramSpec;
|
||||
use crate::arg_matcher::ArgMatcher;
|
||||
use crate::opt::OptMeta;
|
||||
use log::info;
|
||||
use multimap::MultiMap;
|
||||
use regex_lite::Regex;
|
||||
use starlark::any::ProvidesStaticType;
|
||||
use starlark::environment::GlobalsBuilder;
|
||||
use starlark::environment::LibraryExtension;
|
||||
use starlark::environment::Module;
|
||||
use starlark::eval::Evaluator;
|
||||
use starlark::syntax::AstModule;
|
||||
use starlark::syntax::Dialect;
|
||||
use starlark::values::Heap;
|
||||
use starlark::values::list::UnpackList;
|
||||
use starlark::values::none::NoneType;
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub struct PolicyParser {
|
||||
policy_source: String,
|
||||
unparsed_policy: String,
|
||||
}
|
||||
|
||||
impl PolicyParser {
|
||||
pub fn new(policy_source: &str, unparsed_policy: &str) -> Self {
|
||||
Self {
|
||||
policy_source: policy_source.to_string(),
|
||||
unparsed_policy: unparsed_policy.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(&self) -> starlark::Result<Policy> {
|
||||
let mut dialect = Dialect::Extended.clone();
|
||||
dialect.enable_f_strings = true;
|
||||
let ast = AstModule::parse(&self.policy_source, self.unparsed_policy.clone(), &dialect)?;
|
||||
let globals = GlobalsBuilder::extended_by(&[LibraryExtension::Typing])
|
||||
.with(policy_builtins)
|
||||
.build();
|
||||
let module = Module::new();
|
||||
|
||||
let heap = Heap::new();
|
||||
|
||||
module.set("ARG_OPAQUE_VALUE", heap.alloc(ArgMatcher::OpaqueNonFile));
|
||||
module.set("ARG_RFILE", heap.alloc(ArgMatcher::ReadableFile));
|
||||
module.set("ARG_WFILE", heap.alloc(ArgMatcher::WriteableFile));
|
||||
module.set("ARG_RFILES", heap.alloc(ArgMatcher::ReadableFiles));
|
||||
module.set(
|
||||
"ARG_RFILES_OR_CWD",
|
||||
heap.alloc(ArgMatcher::ReadableFilesOrCwd),
|
||||
);
|
||||
module.set("ARG_POS_INT", heap.alloc(ArgMatcher::PositiveInteger));
|
||||
module.set("ARG_SED_COMMAND", heap.alloc(ArgMatcher::SedCommand));
|
||||
module.set(
|
||||
"ARG_UNVERIFIED_VARARGS",
|
||||
heap.alloc(ArgMatcher::UnverifiedVarargs),
|
||||
);
|
||||
|
||||
let policy_builder = PolicyBuilder::new();
|
||||
{
|
||||
let mut eval = Evaluator::new(&module);
|
||||
eval.extra = Some(&policy_builder);
|
||||
eval.eval_module(ast, &globals)?;
|
||||
}
|
||||
let policy = policy_builder.build();
|
||||
policy.map_err(|e| starlark::Error::new_kind(starlark::ErrorKind::Other(e.into())))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ForbiddenProgramRegex {
|
||||
pub regex: regex_lite::Regex,
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, ProvidesStaticType)]
|
||||
struct PolicyBuilder {
|
||||
programs: RefCell<MultiMap<String, ProgramSpec>>,
|
||||
forbidden_program_regexes: RefCell<Vec<ForbiddenProgramRegex>>,
|
||||
forbidden_substrings: RefCell<Vec<String>>,
|
||||
}
|
||||
|
||||
impl PolicyBuilder {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
programs: RefCell::new(MultiMap::new()),
|
||||
forbidden_program_regexes: RefCell::new(Vec::new()),
|
||||
forbidden_substrings: RefCell::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
fn build(self) -> Result<Policy, regex_lite::Error> {
|
||||
let programs = self.programs.into_inner();
|
||||
let forbidden_program_regexes = self.forbidden_program_regexes.into_inner();
|
||||
let forbidden_substrings = self.forbidden_substrings.into_inner();
|
||||
Policy::new(programs, forbidden_program_regexes, forbidden_substrings)
|
||||
}
|
||||
|
||||
fn add_program_spec(&self, program_spec: ProgramSpec) {
|
||||
info!("adding program spec: {program_spec:?}");
|
||||
let name = program_spec.program.clone();
|
||||
let mut programs = self.programs.borrow_mut();
|
||||
programs.insert(name, program_spec);
|
||||
}
|
||||
|
||||
fn add_forbidden_substrings(&self, substrings: &[String]) {
|
||||
let mut forbidden_substrings = self.forbidden_substrings.borrow_mut();
|
||||
forbidden_substrings.extend_from_slice(substrings);
|
||||
}
|
||||
|
||||
fn add_forbidden_program_regex(&self, regex: Regex, reason: String) {
|
||||
let mut forbidden_program_regexes = self.forbidden_program_regexes.borrow_mut();
|
||||
forbidden_program_regexes.push(ForbiddenProgramRegex { regex, reason });
|
||||
}
|
||||
}
|
||||
|
||||
#[starlark_module]
|
||||
fn policy_builtins(builder: &mut GlobalsBuilder) {
|
||||
fn define_program<'v>(
|
||||
program: String,
|
||||
system_path: Option<UnpackList<String>>,
|
||||
option_bundling: Option<bool>,
|
||||
combined_format: Option<bool>,
|
||||
options: Option<UnpackList<Opt>>,
|
||||
args: Option<UnpackList<ArgMatcher>>,
|
||||
forbidden: Option<String>,
|
||||
should_match: Option<UnpackList<UnpackList<String>>>,
|
||||
should_not_match: Option<UnpackList<UnpackList<String>>>,
|
||||
eval: &mut Evaluator,
|
||||
) -> anyhow::Result<NoneType> {
|
||||
let option_bundling = option_bundling.unwrap_or(false);
|
||||
let system_path = system_path.map_or_else(Vec::new, |v| v.items.to_vec());
|
||||
let combined_format = combined_format.unwrap_or(false);
|
||||
let options = options.map_or_else(Vec::new, |v| v.items.to_vec());
|
||||
let args = args.map_or_else(Vec::new, |v| v.items.to_vec());
|
||||
|
||||
let mut allowed_options = HashMap::<String, Opt>::new();
|
||||
for opt in options {
|
||||
let name = opt.name().to_string();
|
||||
if allowed_options
|
||||
.insert(opt.name().to_string(), opt)
|
||||
.is_some()
|
||||
{
|
||||
return Err(anyhow::format_err!("duplicate flag: {name}"));
|
||||
}
|
||||
}
|
||||
|
||||
let program_spec = ProgramSpec::new(
|
||||
program,
|
||||
system_path,
|
||||
option_bundling,
|
||||
combined_format,
|
||||
allowed_options,
|
||||
args,
|
||||
forbidden,
|
||||
should_match
|
||||
.map_or_else(Vec::new, |v| v.items.to_vec())
|
||||
.into_iter()
|
||||
.map(|v| v.items.to_vec())
|
||||
.collect(),
|
||||
should_not_match
|
||||
.map_or_else(Vec::new, |v| v.items.to_vec())
|
||||
.into_iter()
|
||||
.map(|v| v.items.to_vec())
|
||||
.collect(),
|
||||
);
|
||||
|
||||
#[expect(clippy::unwrap_used)]
|
||||
let policy_builder = eval
|
||||
.extra
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.downcast_ref::<PolicyBuilder>()
|
||||
.unwrap();
|
||||
policy_builder.add_program_spec(program_spec);
|
||||
Ok(NoneType)
|
||||
}
|
||||
|
||||
fn forbid_substrings(
|
||||
strings: UnpackList<String>,
|
||||
eval: &mut Evaluator,
|
||||
) -> anyhow::Result<NoneType> {
|
||||
#[expect(clippy::unwrap_used)]
|
||||
let policy_builder = eval
|
||||
.extra
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.downcast_ref::<PolicyBuilder>()
|
||||
.unwrap();
|
||||
policy_builder.add_forbidden_substrings(&strings.items.to_vec());
|
||||
Ok(NoneType)
|
||||
}
|
||||
|
||||
fn forbid_program_regex(
|
||||
regex: String,
|
||||
reason: String,
|
||||
eval: &mut Evaluator,
|
||||
) -> anyhow::Result<NoneType> {
|
||||
#[expect(clippy::unwrap_used)]
|
||||
let policy_builder = eval
|
||||
.extra
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.downcast_ref::<PolicyBuilder>()
|
||||
.unwrap();
|
||||
let compiled_regex = regex_lite::Regex::new(®ex)?;
|
||||
policy_builder.add_forbidden_program_regex(compiled_regex, reason);
|
||||
Ok(NoneType)
|
||||
}
|
||||
|
||||
fn opt(name: String, r#type: ArgMatcher, required: Option<bool>) -> anyhow::Result<Opt> {
|
||||
Ok(Opt::new(
|
||||
name,
|
||||
OptMeta::Value(r#type.arg_type()),
|
||||
required.unwrap_or(false),
|
||||
))
|
||||
}
|
||||
|
||||
fn flag(name: String) -> anyhow::Result<Opt> {
|
||||
Ok(Opt::new(name, OptMeta::Flag, false))
|
||||
}
|
||||
}
|
||||
247
llmx-rs/execpolicy/src/program.rs
Normal file
247
llmx-rs/execpolicy/src/program.rs
Normal file
@@ -0,0 +1,247 @@
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
|
||||
use crate::ArgType;
|
||||
use crate::ExecCall;
|
||||
use crate::arg_matcher::ArgMatcher;
|
||||
use crate::arg_resolver::PositionalArg;
|
||||
use crate::arg_resolver::resolve_observed_args_with_patterns;
|
||||
use crate::error::Error;
|
||||
use crate::error::Result;
|
||||
use crate::opt::Opt;
|
||||
use crate::opt::OptMeta;
|
||||
use crate::valid_exec::MatchedFlag;
|
||||
use crate::valid_exec::MatchedOpt;
|
||||
use crate::valid_exec::ValidExec;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ProgramSpec {
|
||||
pub program: String,
|
||||
pub system_path: Vec<String>,
|
||||
pub option_bundling: bool,
|
||||
pub combined_format: bool,
|
||||
pub allowed_options: HashMap<String, Opt>,
|
||||
pub arg_patterns: Vec<ArgMatcher>,
|
||||
forbidden: Option<String>,
|
||||
required_options: HashSet<String>,
|
||||
should_match: Vec<Vec<String>>,
|
||||
should_not_match: Vec<Vec<String>>,
|
||||
}
|
||||
|
||||
impl ProgramSpec {
|
||||
pub fn new(
|
||||
program: String,
|
||||
system_path: Vec<String>,
|
||||
option_bundling: bool,
|
||||
combined_format: bool,
|
||||
allowed_options: HashMap<String, Opt>,
|
||||
arg_patterns: Vec<ArgMatcher>,
|
||||
forbidden: Option<String>,
|
||||
should_match: Vec<Vec<String>>,
|
||||
should_not_match: Vec<Vec<String>>,
|
||||
) -> Self {
|
||||
let required_options = allowed_options
|
||||
.iter()
|
||||
.filter_map(|(name, opt)| {
|
||||
if opt.required {
|
||||
Some(name.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Self {
|
||||
program,
|
||||
system_path,
|
||||
option_bundling,
|
||||
combined_format,
|
||||
allowed_options,
|
||||
arg_patterns,
|
||||
forbidden,
|
||||
required_options,
|
||||
should_match,
|
||||
should_not_match,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
|
||||
pub enum MatchedExec {
|
||||
Match { exec: ValidExec },
|
||||
Forbidden { cause: Forbidden, reason: String },
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
|
||||
pub enum Forbidden {
|
||||
Program {
|
||||
program: String,
|
||||
exec_call: ExecCall,
|
||||
},
|
||||
Arg {
|
||||
arg: String,
|
||||
exec_call: ExecCall,
|
||||
},
|
||||
Exec {
|
||||
exec: ValidExec,
|
||||
},
|
||||
}
|
||||
|
||||
impl ProgramSpec {
|
||||
// TODO(mbolin): The idea is that there should be a set of rules defined for
|
||||
// a program and the args should be checked against the rules to determine
|
||||
// if the program should be allowed to run.
|
||||
pub fn check(&self, exec_call: &ExecCall) -> Result<MatchedExec> {
|
||||
let mut expecting_option_value: Option<(String, ArgType)> = None;
|
||||
let mut args = Vec::<PositionalArg>::new();
|
||||
let mut matched_flags = Vec::<MatchedFlag>::new();
|
||||
let mut matched_opts = Vec::<MatchedOpt>::new();
|
||||
|
||||
for (index, arg) in exec_call.args.iter().enumerate() {
|
||||
if let Some(expected) = expecting_option_value {
|
||||
// If we are expecting an option value, then the next argument
|
||||
// should be the value for the option.
|
||||
// This had better not be another option!
|
||||
let (name, arg_type) = expected;
|
||||
if arg.starts_with("-") {
|
||||
return Err(Error::OptionFollowedByOptionInsteadOfValue {
|
||||
program: self.program.clone(),
|
||||
option: name,
|
||||
value: arg.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
matched_opts.push(MatchedOpt::new(&name, arg, arg_type)?);
|
||||
expecting_option_value = None;
|
||||
} else if arg == "--" {
|
||||
return Err(Error::DoubleDashNotSupportedYet {
|
||||
program: self.program.clone(),
|
||||
});
|
||||
} else if arg.starts_with("-") {
|
||||
match self.allowed_options.get(arg) {
|
||||
Some(opt) => {
|
||||
match &opt.meta {
|
||||
OptMeta::Flag => {
|
||||
matched_flags.push(MatchedFlag { name: arg.clone() });
|
||||
// A flag does not expect an argument: continue.
|
||||
continue;
|
||||
}
|
||||
OptMeta::Value(arg_type) => {
|
||||
expecting_option_value = Some((arg.clone(), arg_type.clone()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// It could be an --option=value style flag...
|
||||
}
|
||||
}
|
||||
|
||||
return Err(Error::UnknownOption {
|
||||
program: self.program.clone(),
|
||||
option: arg.clone(),
|
||||
});
|
||||
} else {
|
||||
args.push(PositionalArg {
|
||||
index,
|
||||
value: arg.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(expected) = expecting_option_value {
|
||||
let (name, _arg_type) = expected;
|
||||
return Err(Error::OptionMissingValue {
|
||||
program: self.program.clone(),
|
||||
option: name,
|
||||
});
|
||||
}
|
||||
|
||||
let matched_args =
|
||||
resolve_observed_args_with_patterns(&self.program, args, &self.arg_patterns)?;
|
||||
|
||||
// Verify all required options are present.
|
||||
let matched_opt_names: HashSet<String> = matched_opts
|
||||
.iter()
|
||||
.map(|opt| opt.name().to_string())
|
||||
.collect();
|
||||
if !matched_opt_names.is_superset(&self.required_options) {
|
||||
let mut options = self
|
||||
.required_options
|
||||
.difference(&matched_opt_names)
|
||||
.map(String::from)
|
||||
.collect::<Vec<_>>();
|
||||
options.sort();
|
||||
return Err(Error::MissingRequiredOptions {
|
||||
program: self.program.clone(),
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
let exec = ValidExec {
|
||||
program: self.program.clone(),
|
||||
flags: matched_flags,
|
||||
opts: matched_opts,
|
||||
args: matched_args,
|
||||
system_path: self.system_path.clone(),
|
||||
};
|
||||
match &self.forbidden {
|
||||
Some(reason) => Ok(MatchedExec::Forbidden {
|
||||
cause: Forbidden::Exec { exec },
|
||||
reason: reason.clone(),
|
||||
}),
|
||||
None => Ok(MatchedExec::Match { exec }),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn verify_should_match_list(&self) -> Vec<PositiveExampleFailedCheck> {
|
||||
let mut violations = Vec::new();
|
||||
for good in &self.should_match {
|
||||
let exec_call = ExecCall {
|
||||
program: self.program.clone(),
|
||||
args: good.clone(),
|
||||
};
|
||||
match self.check(&exec_call) {
|
||||
Ok(_) => {}
|
||||
Err(error) => {
|
||||
violations.push(PositiveExampleFailedCheck {
|
||||
program: self.program.clone(),
|
||||
args: good.clone(),
|
||||
error,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
violations
|
||||
}
|
||||
|
||||
pub fn verify_should_not_match_list(&self) -> Vec<NegativeExamplePassedCheck> {
|
||||
let mut violations = Vec::new();
|
||||
for bad in &self.should_not_match {
|
||||
let exec_call = ExecCall {
|
||||
program: self.program.clone(),
|
||||
args: bad.clone(),
|
||||
};
|
||||
if self.check(&exec_call).is_ok() {
|
||||
violations.push(NegativeExamplePassedCheck {
|
||||
program: self.program.clone(),
|
||||
args: bad.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
violations
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct PositiveExampleFailedCheck {
|
||||
pub program: String,
|
||||
pub args: Vec<String>,
|
||||
pub error: Error,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct NegativeExamplePassedCheck {
|
||||
pub program: String,
|
||||
pub args: Vec<String>,
|
||||
}
|
||||
17
llmx-rs/execpolicy/src/sed_command.rs
Normal file
17
llmx-rs/execpolicy/src/sed_command.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
use crate::error::Error;
|
||||
use crate::error::Result;
|
||||
|
||||
pub fn parse_sed_command(sed_command: &str) -> Result<()> {
|
||||
// For now, we parse only commands like `122,202p`.
|
||||
if let Some(stripped) = sed_command.strip_suffix("p")
|
||||
&& let Some((first, rest)) = stripped.split_once(",")
|
||||
&& first.parse::<u64>().is_ok()
|
||||
&& rest.parse::<u64>().is_ok()
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
Err(Error::SedCommandNotProvablySafe {
|
||||
command: sed_command.to_string(),
|
||||
})
|
||||
}
|
||||
95
llmx-rs/execpolicy/src/valid_exec.rs
Normal file
95
llmx-rs/execpolicy/src/valid_exec.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use crate::arg_type::ArgType;
|
||||
use crate::error::Result;
|
||||
use serde::Serialize;
|
||||
|
||||
/// exec() invocation that has been accepted by a `Policy`.
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)]
|
||||
pub struct ValidExec {
|
||||
pub program: String,
|
||||
pub flags: Vec<MatchedFlag>,
|
||||
pub opts: Vec<MatchedOpt>,
|
||||
pub args: Vec<MatchedArg>,
|
||||
|
||||
/// If non-empty, a prioritized list of paths to try instead of `program`.
|
||||
/// For example, `/bin/ls` is harder to compromise than whatever `ls`
|
||||
/// happens to be in the user's `$PATH`, so `/bin/ls` would be included for
|
||||
/// `ls`. The caller is free to disregard this list and use `program`.
|
||||
pub system_path: Vec<String>,
|
||||
}
|
||||
|
||||
impl ValidExec {
|
||||
pub fn new(program: &str, args: Vec<MatchedArg>, system_path: &[&str]) -> Self {
|
||||
Self {
|
||||
program: program.to_string(),
|
||||
flags: vec![],
|
||||
opts: vec![],
|
||||
args,
|
||||
system_path: system_path.iter().map(|&s| s.to_string()).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether a possible side effect of running this command includes writing
|
||||
/// a file.
|
||||
pub fn might_write_files(&self) -> bool {
|
||||
self.opts.iter().any(|opt| opt.r#type.might_write_file())
|
||||
|| self.args.iter().any(|opt| opt.r#type.might_write_file())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
|
||||
pub struct MatchedArg {
|
||||
pub index: usize,
|
||||
pub r#type: ArgType,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
impl MatchedArg {
|
||||
pub fn new(index: usize, r#type: ArgType, value: &str) -> Result<Self> {
|
||||
r#type.validate(value)?;
|
||||
Ok(Self {
|
||||
index,
|
||||
r#type,
|
||||
value: value.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// A match for an option declared with opt() in a .policy file.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
|
||||
pub struct MatchedOpt {
|
||||
/// Name of the option that was matched.
|
||||
pub name: String,
|
||||
/// Value supplied for the option.
|
||||
pub value: String,
|
||||
/// Type of the value supplied for the option.
|
||||
pub r#type: ArgType,
|
||||
}
|
||||
|
||||
impl MatchedOpt {
|
||||
pub fn new(name: &str, value: &str, r#type: ArgType) -> Result<Self> {
|
||||
r#type.validate(value)?;
|
||||
Ok(Self {
|
||||
name: name.to_string(),
|
||||
value: value.to_string(),
|
||||
r#type,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
|
||||
pub struct MatchedFlag {
|
||||
/// Name of the flag that was matched.
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
impl MatchedFlag {
|
||||
pub fn new(name: &str) -> Self {
|
||||
Self {
|
||||
name: name.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user