fix: run apply_patch calls through the sandbox (#1705)
Building on the work of https://github.com/openai/codex/pull/1702, this changes how a shell call to `apply_patch` is handled. Previously, a shell call to `apply_patch` was always handled in-process, never leveraging a sandbox. To determine whether the `apply_patch` operation could be auto-approved, the `is_write_patch_constrained_to_writable_paths()` function would check if all the paths listed in the paths were writable. If so, the agent would apply the changes listed in the patch. Unfortunately, this approach afforded a loophole: symlinks! * For a soft link, we could fix this issue by tracing the link and checking whether the target is in the set of writable paths, however... * ...For a hard link, things are not as simple. We can run `stat FILE` to see if the number of links is greater than 1, but then we would have to do something potentially expensive like `find . -inum <inode_number>` to find the other paths for `FILE`. Further, even if this worked, this approach runs the risk of a [TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use) race condition, so it is not robust. The solution, implemented in this PR, is to take the virtual execution of the `apply_patch` CLI into an _actual_ execution using `codex --codex-run-as-apply-patch PATCH`, which we can run under the sandbox the user specified, just like any other `shell` call. This, of course, assumes that the sandbox prevents writing through symlinks as a mechanism to write to folders that are not in the writable set configured by the sandbox. I verified this by testing the following on both Mac and Linux: ```shell #!/usr/bin/env bash set -euo pipefail # Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR? # Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory. SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX) # EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it. EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX) echo "SANDBOX_DIR: $SANDBOX_DIR" echo "EXPLOIT_DIR: $EXPLOIT_DIR" cleanup() { # Only remove if it looks sane and still exists [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR" [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR" } trap cleanup EXIT echo "I am the original content" > "${EXPLOIT_DIR}/original.txt" # Drop the -s to test hard links. ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt" cat "${SANDBOX_DIR}/link-to-original.txt" if [[ "$(uname)" == "Linux" ]]; then SANDBOX_SUBCOMMAND=landlock else SANDBOX_SUBCOMMAND=seatbelt fi # Attempt the exploit cd "${SANDBOX_DIR}" codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true cat "${EXPLOIT_DIR}/original.txt" ``` Admittedly, this change merits a proper integration test, but I think I will have to do that in a follow-up PR.
This commit is contained in:
@@ -22,6 +22,7 @@
|
||||
//!
|
||||
//! The parser below is a little more lenient than the explicit spec and allows for
|
||||
//! leading/trailing whitespace around patch markers.
|
||||
use crate::ApplyPatchArgs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -102,7 +103,7 @@ pub struct UpdateFileChunk {
|
||||
pub is_end_of_file: bool,
|
||||
}
|
||||
|
||||
pub fn parse_patch(patch: &str) -> Result<Vec<Hunk>, ParseError> {
|
||||
pub fn parse_patch(patch: &str) -> Result<ApplyPatchArgs, ParseError> {
|
||||
let mode = if PARSE_IN_STRICT_MODE {
|
||||
ParseMode::Strict
|
||||
} else {
|
||||
@@ -150,7 +151,7 @@ enum ParseMode {
|
||||
Lenient,
|
||||
}
|
||||
|
||||
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<Vec<Hunk>, ParseError> {
|
||||
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, ParseError> {
|
||||
let lines: Vec<&str> = patch.trim().lines().collect();
|
||||
let lines: &[&str] = match check_patch_boundaries_strict(&lines) {
|
||||
Ok(()) => &lines,
|
||||
@@ -173,7 +174,8 @@ fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<Vec<Hunk>, ParseErro
|
||||
line_number += hunk_lines;
|
||||
remaining_lines = &remaining_lines[hunk_lines..]
|
||||
}
|
||||
Ok(hunks)
|
||||
let patch = lines.join("\n");
|
||||
Ok(ApplyPatchArgs { hunks, patch })
|
||||
}
|
||||
|
||||
/// Checks the start and end lines of the patch text for `apply_patch`,
|
||||
@@ -425,6 +427,7 @@ fn parse_update_file_chunk(
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::unwrap_used)]
|
||||
fn test_parse_patch() {
|
||||
assert_eq!(
|
||||
parse_patch_text("bad", ParseMode::Strict),
|
||||
@@ -455,8 +458,10 @@ fn test_parse_patch() {
|
||||
"*** Begin Patch\n\
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(Vec::new())
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
Vec::new()
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(
|
||||
@@ -472,8 +477,10 @@ fn test_parse_patch() {
|
||||
+ return 123\n\
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(vec![
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
vec![
|
||||
AddFile {
|
||||
path: PathBuf::from("path/add.py"),
|
||||
contents: "abc\ndef\n".to_string()
|
||||
@@ -491,7 +498,7 @@ fn test_parse_patch() {
|
||||
is_end_of_file: false
|
||||
}]
|
||||
}
|
||||
])
|
||||
]
|
||||
);
|
||||
// Update hunk followed by another hunk (Add File).
|
||||
assert_eq!(
|
||||
@@ -504,8 +511,10 @@ fn test_parse_patch() {
|
||||
+content\n\
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(vec![
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
vec![
|
||||
UpdateFile {
|
||||
path: PathBuf::from("file.py"),
|
||||
move_path: None,
|
||||
@@ -520,7 +529,7 @@ fn test_parse_patch() {
|
||||
path: PathBuf::from("other.py"),
|
||||
contents: "content\n".to_string()
|
||||
}
|
||||
])
|
||||
]
|
||||
);
|
||||
|
||||
// Update hunk without an explicit @@ header for the first chunk should parse.
|
||||
@@ -533,8 +542,10 @@ fn test_parse_patch() {
|
||||
+bar
|
||||
*** End Patch"#,
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(vec![UpdateFile {
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
vec![UpdateFile {
|
||||
path: PathBuf::from("file2.py"),
|
||||
move_path: None,
|
||||
chunks: vec![UpdateFileChunk {
|
||||
@@ -543,7 +554,7 @@ fn test_parse_patch() {
|
||||
new_lines: vec!["import foo".to_string(), "bar".to_string()],
|
||||
is_end_of_file: false,
|
||||
}],
|
||||
}])
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -574,7 +585,10 @@ fn test_parse_patch_lenient() {
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_heredoc, ParseMode::Lenient),
|
||||
Ok(expected_patch.clone())
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: expected_patch.clone(),
|
||||
patch: patch_text.to_string()
|
||||
})
|
||||
);
|
||||
|
||||
let patch_text_in_single_quoted_heredoc = format!("<<'EOF'\n{patch_text}\nEOF\n");
|
||||
@@ -584,7 +598,10 @@ fn test_parse_patch_lenient() {
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_single_quoted_heredoc, ParseMode::Lenient),
|
||||
Ok(expected_patch.clone())
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: expected_patch.clone(),
|
||||
patch: patch_text.to_string()
|
||||
})
|
||||
);
|
||||
|
||||
let patch_text_in_double_quoted_heredoc = format!("<<\"EOF\"\n{patch_text}\nEOF\n");
|
||||
@@ -594,7 +611,10 @@ fn test_parse_patch_lenient() {
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_double_quoted_heredoc, ParseMode::Lenient),
|
||||
Ok(expected_patch.clone())
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: expected_patch.clone(),
|
||||
patch: patch_text.to_string()
|
||||
})
|
||||
);
|
||||
|
||||
let patch_text_in_mismatched_quotes_heredoc = format!("<<\"EOF'\n{patch_text}\nEOF\n");
|
||||
|
||||
Reference in New Issue
Block a user