This PR fixes a potential path traversal vulnerability by ensuring all paths are properly normalized in the `resolvePathAgainstWorkdir` function. ## Changes - Added path normalization for both absolute and relative paths - Ensures normalized paths are used in all subsequent operations - Prevents potential path traversal attacks through non-normalized paths This minimal change addresses the security concern without adding unnecessary complexity, while maintaining compatibility with existing code.
594 lines
16 KiB
TypeScript
594 lines
16 KiB
TypeScript
import type { ParseEntry, ControlOperator } from "shell-quote";
|
|
|
|
import {
|
|
identify_files_added,
|
|
identify_files_needed,
|
|
} from "./utils/agent/apply-patch";
|
|
import * as path from "path";
|
|
import { parse } from "shell-quote";
|
|
|
|
export type SafetyAssessment = {
|
|
/**
|
|
* If set, this approval is for an apply_patch call and these are the
|
|
* arguments.
|
|
*/
|
|
applyPatch?: ApplyPatchCommand;
|
|
} & (
|
|
| {
|
|
type: "auto-approve";
|
|
/**
|
|
* This must be true if the command is not on the "known safe" list, but
|
|
* was auto-approved due to `full-auto` mode.
|
|
*/
|
|
runInSandbox: boolean;
|
|
reason: string;
|
|
group: string;
|
|
}
|
|
| {
|
|
type: "ask-user";
|
|
}
|
|
/**
|
|
* Reserved for a case where we are certain the command is unsafe and should
|
|
* not be presented as an option to the user.
|
|
*/
|
|
| {
|
|
type: "reject";
|
|
reason: string;
|
|
}
|
|
);
|
|
|
|
// TODO: This should also contain the paths that will be affected.
|
|
export type ApplyPatchCommand = {
|
|
patch: string;
|
|
};
|
|
|
|
export type ApprovalPolicy =
|
|
/**
|
|
* Under this policy, only "known safe" commands as defined by
|
|
* `isSafeCommand()` that only read files will be auto-approved.
|
|
*/
|
|
| "suggest"
|
|
|
|
/**
|
|
* In addition to commands that are auto-approved according to the rules for
|
|
* "suggest", commands that write files within the user's approved list of
|
|
* writable paths will also be auto-approved.
|
|
*/
|
|
| "auto-edit"
|
|
|
|
/**
|
|
* All commands are auto-approved, but are expected to be run in a sandbox
|
|
* where network access is disabled and writes are limited to a specific set
|
|
* of paths.
|
|
*/
|
|
| "full-auto";
|
|
|
|
/**
|
|
* Tries to assess whether a command is safe to run, though may defer to the
|
|
* user for approval.
|
|
*
|
|
* Note `env` must be the same `env` that will be used to spawn the process.
|
|
*/
|
|
export function canAutoApprove(
|
|
command: ReadonlyArray<string>,
|
|
workdir: string | undefined,
|
|
policy: ApprovalPolicy,
|
|
writableRoots: ReadonlyArray<string>,
|
|
env: NodeJS.ProcessEnv = process.env,
|
|
): SafetyAssessment {
|
|
if (command[0] === "apply_patch") {
|
|
return command.length === 2 && typeof command[1] === "string"
|
|
? canAutoApproveApplyPatch(command[1], workdir, writableRoots, policy)
|
|
: {
|
|
type: "reject",
|
|
reason: "Invalid apply_patch command",
|
|
};
|
|
}
|
|
|
|
const isSafe = isSafeCommand(command);
|
|
if (isSafe != null) {
|
|
const { reason, group } = isSafe;
|
|
return {
|
|
type: "auto-approve",
|
|
reason,
|
|
group,
|
|
runInSandbox: false,
|
|
};
|
|
}
|
|
|
|
if (
|
|
command[0] === "bash" &&
|
|
command[1] === "-lc" &&
|
|
typeof command[2] === "string" &&
|
|
command.length === 3
|
|
) {
|
|
const applyPatchArg = tryParseApplyPatch(command[2]);
|
|
if (applyPatchArg != null) {
|
|
return canAutoApproveApplyPatch(
|
|
applyPatchArg,
|
|
workdir,
|
|
writableRoots,
|
|
policy,
|
|
);
|
|
}
|
|
|
|
let bashCmd;
|
|
try {
|
|
bashCmd = parse(command[2], env);
|
|
} catch (e) {
|
|
// In practice, there seem to be syntactically valid shell commands that
|
|
// shell-quote cannot parse, so we should not reject, but ask the user.
|
|
switch (policy) {
|
|
case "full-auto":
|
|
// In full-auto, we still run the command automatically, but must
|
|
// restrict it to the sandbox.
|
|
return {
|
|
type: "auto-approve",
|
|
reason: "Full auto mode",
|
|
group: "Running commands",
|
|
runInSandbox: true,
|
|
};
|
|
case "suggest":
|
|
case "auto-edit":
|
|
// In all other modes, since we cannot reason about the command, we
|
|
// should ask the user.
|
|
return {
|
|
type: "ask-user",
|
|
};
|
|
}
|
|
}
|
|
|
|
// bashCmd could be a mix of strings and operators, e.g.:
|
|
// "ls || (true && pwd)" => [ 'ls', { op: '||' }, '(', 'true', { op: '&&' }, 'pwd', ')' ]
|
|
// We try to ensure that *every* command segment is deemed safe and that
|
|
// all operators belong to an allow-list. If so, the entire expression is
|
|
// considered auto-approvable.
|
|
|
|
const shellSafe = isEntireShellExpressionSafe(bashCmd);
|
|
if (shellSafe != null) {
|
|
const { reason, group } = shellSafe;
|
|
return {
|
|
type: "auto-approve",
|
|
reason,
|
|
group,
|
|
runInSandbox: false,
|
|
};
|
|
}
|
|
}
|
|
|
|
return policy === "full-auto"
|
|
? {
|
|
type: "auto-approve",
|
|
reason: "Full auto mode",
|
|
group: "Running commands",
|
|
runInSandbox: true,
|
|
}
|
|
: { type: "ask-user" };
|
|
}
|
|
|
|
function canAutoApproveApplyPatch(
|
|
applyPatchArg: string,
|
|
workdir: string | undefined,
|
|
writableRoots: ReadonlyArray<string>,
|
|
policy: ApprovalPolicy,
|
|
): SafetyAssessment {
|
|
switch (policy) {
|
|
case "full-auto":
|
|
// Continue to see if this can be auto-approved.
|
|
break;
|
|
case "suggest":
|
|
return {
|
|
type: "ask-user",
|
|
applyPatch: { patch: applyPatchArg },
|
|
};
|
|
case "auto-edit":
|
|
// Continue to see if this can be auto-approved.
|
|
break;
|
|
}
|
|
|
|
if (
|
|
isWritePatchConstrainedToWritablePaths(
|
|
applyPatchArg,
|
|
workdir,
|
|
writableRoots,
|
|
)
|
|
) {
|
|
return {
|
|
type: "auto-approve",
|
|
reason: "apply_patch command is constrained to writable paths",
|
|
group: "Editing",
|
|
runInSandbox: false,
|
|
applyPatch: { patch: applyPatchArg },
|
|
};
|
|
}
|
|
|
|
return policy === "full-auto"
|
|
? {
|
|
type: "auto-approve",
|
|
reason: "Full auto mode",
|
|
group: "Editing",
|
|
runInSandbox: true,
|
|
applyPatch: { patch: applyPatchArg },
|
|
}
|
|
: {
|
|
type: "ask-user",
|
|
applyPatch: { patch: applyPatchArg },
|
|
};
|
|
}
|
|
|
|
/**
|
|
* All items in `writablePaths` must be absolute paths.
|
|
*/
|
|
function isWritePatchConstrainedToWritablePaths(
|
|
applyPatchArg: string,
|
|
workdir: string | undefined,
|
|
writableRoots: ReadonlyArray<string>,
|
|
): boolean {
|
|
// `identify_files_needed()` returns a list of files that will be modified or
|
|
// deleted by the patch, so all of them should already exist on disk. These
|
|
// candidate paths could be further canonicalized via fs.realpath(), though
|
|
// that does seem necessary and may even cause false negatives (assuming we
|
|
// allow writes in other directories that are symlinked from a writable path)
|
|
//
|
|
// By comparison, `identify_files_added()` returns a list of files that will
|
|
// be added by the patch, so they should NOT exist on disk yet and therefore
|
|
// using one with fs.realpath() should return an error.
|
|
return (
|
|
allPathsConstrainedTowritablePaths(
|
|
identify_files_needed(applyPatchArg),
|
|
workdir,
|
|
writableRoots,
|
|
) &&
|
|
allPathsConstrainedTowritablePaths(
|
|
identify_files_added(applyPatchArg),
|
|
workdir,
|
|
writableRoots,
|
|
)
|
|
);
|
|
}
|
|
|
|
function allPathsConstrainedTowritablePaths(
|
|
candidatePaths: ReadonlyArray<string>,
|
|
workdir: string | undefined,
|
|
writableRoots: ReadonlyArray<string>,
|
|
): boolean {
|
|
return candidatePaths.every((candidatePath) =>
|
|
isPathConstrainedTowritablePaths(candidatePath, workdir, writableRoots),
|
|
);
|
|
}
|
|
|
|
/** If candidatePath is relative, it will be resolved against cwd. */
|
|
function isPathConstrainedTowritablePaths(
|
|
candidatePath: string,
|
|
workdir: string | undefined,
|
|
writableRoots: ReadonlyArray<string>,
|
|
): boolean {
|
|
const candidateAbsolutePath = resolvePathAgainstWorkdir(
|
|
candidatePath,
|
|
workdir,
|
|
);
|
|
|
|
return writableRoots.some((writablePath) =>
|
|
pathContains(writablePath, candidateAbsolutePath),
|
|
);
|
|
}
|
|
|
|
/**
|
|
* If not already an absolute path, resolves `candidatePath` against `workdir`
|
|
* if specified; otherwise, against `process.cwd()`.
|
|
*/
|
|
export function resolvePathAgainstWorkdir(
|
|
candidatePath: string,
|
|
workdir: string | undefined,
|
|
): string {
|
|
// Normalize candidatePath to prevent path traversal attacks
|
|
const normalizedCandidatePath = path.normalize(candidatePath);
|
|
if (path.isAbsolute(normalizedCandidatePath)) {
|
|
return normalizedCandidatePath;
|
|
} else if (workdir != null) {
|
|
return path.resolve(workdir, normalizedCandidatePath);
|
|
} else {
|
|
return path.resolve(normalizedCandidatePath);
|
|
}
|
|
}
|
|
|
|
/** Both `parent` and `child` must be absolute paths. */
|
|
function pathContains(parent: string, child: string): boolean {
|
|
const relative = path.relative(parent, child);
|
|
return (
|
|
// relative path doesn't go outside parent
|
|
!!relative && !relative.startsWith("..") && !path.isAbsolute(relative)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* `bashArg` might be something like "apply_patch << 'EOF' *** Begin...".
|
|
* If this function returns a string, then it is the content the arg to
|
|
* apply_patch with the heredoc removed.
|
|
*/
|
|
function tryParseApplyPatch(bashArg: string): string | null {
|
|
const prefix = "apply_patch";
|
|
if (!bashArg.startsWith(prefix)) {
|
|
return null;
|
|
}
|
|
|
|
const heredoc = bashArg.slice(prefix.length);
|
|
const heredocMatch = heredoc.match(
|
|
/^\s*<<\s*['"]?(\w+)['"]?\n([\s\S]*?)\n\1/,
|
|
);
|
|
if (heredocMatch != null && typeof heredocMatch[2] === "string") {
|
|
return heredocMatch[2].trim();
|
|
} else {
|
|
return heredoc.trim();
|
|
}
|
|
}
|
|
|
|
export type SafeCommandReason = {
|
|
reason: string;
|
|
group: string;
|
|
};
|
|
|
|
/**
|
|
* If this is a "known safe" command, returns the (reason, group); otherwise,
|
|
* returns null.
|
|
*/
|
|
export function isSafeCommand(
|
|
command: ReadonlyArray<string>,
|
|
): SafeCommandReason | null {
|
|
const [cmd0, cmd1, cmd2, cmd3] = command;
|
|
|
|
switch (cmd0) {
|
|
case "cd":
|
|
return {
|
|
reason: "Change directory",
|
|
group: "Navigating",
|
|
};
|
|
case "ls":
|
|
return {
|
|
reason: "List directory",
|
|
group: "Searching",
|
|
};
|
|
case "pwd":
|
|
return {
|
|
reason: "Print working directory",
|
|
group: "Navigating",
|
|
};
|
|
case "true":
|
|
return {
|
|
reason: "No-op (true)",
|
|
group: "Utility",
|
|
};
|
|
case "echo":
|
|
return { reason: "Echo string", group: "Printing" };
|
|
case "cat":
|
|
return {
|
|
reason: "View file contents",
|
|
group: "Reading files",
|
|
};
|
|
case "rg":
|
|
return {
|
|
reason: "Ripgrep search",
|
|
group: "Searching",
|
|
};
|
|
case "find": {
|
|
// Certain options to `find` allow executing arbitrary processes, so we
|
|
// cannot auto-approve them.
|
|
if (
|
|
command.some((arg: string) => UNSAFE_OPTIONS_FOR_FIND_COMMAND.has(arg))
|
|
) {
|
|
break;
|
|
} else {
|
|
return {
|
|
reason: "Find files or directories",
|
|
group: "Searching",
|
|
};
|
|
}
|
|
}
|
|
case "grep":
|
|
return {
|
|
reason: "Text search (grep)",
|
|
group: "Searching",
|
|
};
|
|
case "head":
|
|
return {
|
|
reason: "Show file head",
|
|
group: "Reading files",
|
|
};
|
|
case "tail":
|
|
return {
|
|
reason: "Show file tail",
|
|
group: "Reading files",
|
|
};
|
|
case "wc":
|
|
return {
|
|
reason: "Word count",
|
|
group: "Reading files",
|
|
};
|
|
case "which":
|
|
return {
|
|
reason: "Locate command",
|
|
group: "Searching",
|
|
};
|
|
case "git":
|
|
switch (cmd1) {
|
|
case "status":
|
|
return {
|
|
reason: "Git status",
|
|
group: "Versioning",
|
|
};
|
|
case "branch":
|
|
return {
|
|
reason: "List Git branches",
|
|
group: "Versioning",
|
|
};
|
|
case "log":
|
|
return {
|
|
reason: "Git log",
|
|
group: "Using git",
|
|
};
|
|
case "diff":
|
|
return {
|
|
reason: "Git diff",
|
|
group: "Using git",
|
|
};
|
|
case "show":
|
|
return {
|
|
reason: "Git show",
|
|
group: "Using git",
|
|
};
|
|
default:
|
|
return null;
|
|
}
|
|
case "cargo":
|
|
if (cmd1 === "check") {
|
|
return {
|
|
reason: "Cargo check",
|
|
group: "Running command",
|
|
};
|
|
}
|
|
break;
|
|
case "sed":
|
|
if (
|
|
cmd1 === "-n" &&
|
|
isValidSedNArg(cmd2) &&
|
|
typeof cmd3 === "string" &&
|
|
command.length === 4
|
|
) {
|
|
return {
|
|
reason: "Sed print subset",
|
|
group: "Reading files",
|
|
};
|
|
}
|
|
break;
|
|
default:
|
|
return null;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function isValidSedNArg(arg: string | undefined): boolean {
|
|
return arg != null && /^(\d+,)?\d+p$/.test(arg);
|
|
}
|
|
|
|
const UNSAFE_OPTIONS_FOR_FIND_COMMAND: ReadonlySet<string> = new Set([
|
|
// Options that can execute arbitrary commands.
|
|
"-exec",
|
|
"-execdir",
|
|
"-ok",
|
|
"-okdir",
|
|
// Option that deletes matching files.
|
|
"-delete",
|
|
// Options that write pathnames to a file.
|
|
"-fls",
|
|
"-fprint",
|
|
"-fprint0",
|
|
"-fprintf",
|
|
]);
|
|
|
|
// ---------------- Helper utilities for complex shell expressions -----------------
|
|
|
|
// A conservative allow-list of bash operators that do not, on their own, cause
|
|
// side effects. Redirections (>, >>, <, etc.) and command substitution `$()`
|
|
// are intentionally excluded. Parentheses used for grouping are treated as
|
|
// strings by `shell-quote`, so we do not add them here. Reference:
|
|
// https://github.com/substack/node-shell-quote#parsecmd-opts
|
|
const SAFE_SHELL_OPERATORS: ReadonlySet<string> = new Set([
|
|
"&&", // logical AND
|
|
"||", // logical OR
|
|
"|", // pipe
|
|
";", // command separator
|
|
]);
|
|
|
|
/**
|
|
* Determines whether a parsed shell expression consists solely of safe
|
|
* commands (as per `isSafeCommand`) combined using only operators in
|
|
* `SAFE_SHELL_OPERATORS`.
|
|
*
|
|
* If entirely safe, returns the reason/group from the *first* command
|
|
* segment so callers can surface a meaningful description. Otherwise returns
|
|
* null.
|
|
*/
|
|
function isEntireShellExpressionSafe(
|
|
parts: ReadonlyArray<ParseEntry>,
|
|
): SafeCommandReason | null {
|
|
if (parts.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
// Collect command segments delimited by operators. `shell-quote` represents
|
|
// subshell grouping parentheses as literal strings "(" and ")"; treat them
|
|
// as unsafe to keep the logic simple (since subshells could introduce
|
|
// unexpected scope changes).
|
|
|
|
let currentSegment: Array<string> = [];
|
|
let firstReason: SafeCommandReason | null = null;
|
|
|
|
const flushSegment = (): boolean => {
|
|
if (currentSegment.length === 0) {
|
|
return true; // nothing to validate (possible leading operator)
|
|
}
|
|
const assessment = isSafeCommand(currentSegment);
|
|
if (assessment == null) {
|
|
return false;
|
|
}
|
|
if (firstReason == null) {
|
|
firstReason = assessment;
|
|
}
|
|
currentSegment = [];
|
|
return true;
|
|
};
|
|
|
|
for (const part of parts) {
|
|
if (typeof part === "string") {
|
|
// If this string looks like an open/close parenthesis or brace, treat as
|
|
// unsafe to avoid parsing complexity.
|
|
if (part === "(" || part === ")" || part === "{" || part === "}") {
|
|
return null;
|
|
}
|
|
currentSegment.push(part);
|
|
} else if (isParseEntryWithOp(part)) {
|
|
// Validate the segment accumulated so far.
|
|
if (!flushSegment()) {
|
|
return null;
|
|
}
|
|
|
|
// Validate the operator itself.
|
|
if (!SAFE_SHELL_OPERATORS.has(part.op)) {
|
|
return null;
|
|
}
|
|
} else {
|
|
// Unknown token type
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Validate any trailing command segment.
|
|
if (!flushSegment()) {
|
|
return null;
|
|
}
|
|
|
|
return firstReason;
|
|
} catch (_err) {
|
|
// If there's any kind of failure, just bail out and return null.
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Runtime type guard that narrows a `ParseEntry` to the variants that
|
|
// carry an `op` field. Using a dedicated function avoids the need for
|
|
// inline type assertions and makes the narrowing reusable and explicit.
|
|
function isParseEntryWithOp(
|
|
entry: ParseEntry,
|
|
): entry is { op: ControlOperator } | { op: "glob"; pattern: string } {
|
|
return (
|
|
typeof entry === "object" &&
|
|
entry != null &&
|
|
// Using the safe `in` operator keeps the check property-safe even when
|
|
// `entry` is a `string`.
|
|
"op" in entry &&
|
|
typeof (entry as { op?: unknown }).op === "string"
|
|
);
|
|
}
|