2025-04-16 12:56:08 -04:00
|
|
|
|
import type { CommandConfirmation } from "./agent-loop.js";
|
|
|
|
|
|
import type { AppConfig } from "../config.js";
|
|
|
|
|
|
import type { ExecInput } from "./sandbox/interface.js";
|
2025-04-16 14:16:53 -07:00
|
|
|
|
import type { ApplyPatchCommand, ApprovalPolicy } from "../../approvals.js";
|
2025-04-16 12:56:08 -04:00
|
|
|
|
import type { ResponseInputItem } from "openai/resources/responses/responses.mjs";
|
|
|
|
|
|
|
|
|
|
|
|
import { exec, execApplyPatch } from "./exec.js";
|
|
|
|
|
|
import { isLoggingEnabled, log } from "./log.js";
|
|
|
|
|
|
import { ReviewDecision } from "./review.js";
|
|
|
|
|
|
import { FullAutoErrorMode } from "../auto-approval-mode.js";
|
|
|
|
|
|
import { SandboxType } from "./sandbox/interface.js";
|
2025-04-16 14:16:53 -07:00
|
|
|
|
import { canAutoApprove } from "../../approvals.js";
|
|
|
|
|
|
import { formatCommandForDisplay } from "../../format-command.js";
|
2025-04-16 12:56:08 -04:00
|
|
|
|
import { access } from "fs/promises";
|
|
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
// Session‑level cache of commands that the user has chosen to always approve.
|
|
|
|
|
|
//
|
|
|
|
|
|
// The values are derived via `deriveCommandKey()` which intentionally ignores
|
|
|
|
|
|
// volatile arguments (for example the patch text passed to `apply_patch`).
|
|
|
|
|
|
// Storing *generalised* keys means that once a user selects "always approve"
|
|
|
|
|
|
// for a given class of command we will genuinely stop prompting them for
|
|
|
|
|
|
// subsequent, equivalent invocations during the same CLI session.
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
const alwaysApprovedCommands = new Set<string>();
|
|
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
// Helper: Given the argv-style representation of a command, return a stable
|
|
|
|
|
|
// string key that can be used for equality checks.
|
|
|
|
|
|
//
|
|
|
|
|
|
// The key space purposefully abstracts away parts of the command line that
|
|
|
|
|
|
// are expected to change between invocations while still retaining enough
|
|
|
|
|
|
// information to differentiate *meaningfully distinct* operations. See the
|
|
|
|
|
|
// extensive inline documentation for details.
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
function deriveCommandKey(cmd: Array<string>): string {
|
|
|
|
|
|
// pull off only the bits you care about
|
|
|
|
|
|
const [
|
|
|
|
|
|
maybeShell,
|
|
|
|
|
|
maybeFlag,
|
|
|
|
|
|
coreInvocation,
|
|
|
|
|
|
/* …ignore the rest… */
|
|
|
|
|
|
] = cmd;
|
|
|
|
|
|
|
|
|
|
|
|
if (coreInvocation?.startsWith("apply_patch")) {
|
|
|
|
|
|
return "apply_patch";
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (maybeShell === "bash" && maybeFlag === "-lc") {
|
|
|
|
|
|
// If the command was invoked through `bash -lc "<script>"` we extract the
|
|
|
|
|
|
// base program name from the script string.
|
|
|
|
|
|
const script = coreInvocation ?? "";
|
|
|
|
|
|
return script.split(/\s+/)[0] || "bash";
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// For every other command we fall back to using only the program name (the
|
|
|
|
|
|
// first argv element). This guarantees we always return a *string* even if
|
|
|
|
|
|
// `coreInvocation` is undefined.
|
|
|
|
|
|
if (coreInvocation) {
|
|
|
|
|
|
return coreInvocation.split(/\s+/)[0]!;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return JSON.stringify(cmd);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
type HandleExecCommandResult = {
|
|
|
|
|
|
outputText: string;
|
|
|
|
|
|
metadata: Record<string, unknown>;
|
|
|
|
|
|
additionalItems?: Array<ResponseInputItem>;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
export async function handleExecCommand(
|
|
|
|
|
|
args: ExecInput,
|
|
|
|
|
|
config: AppConfig,
|
|
|
|
|
|
policy: ApprovalPolicy,
|
2025-04-17 15:39:26 -07:00
|
|
|
|
additionalWritableRoots: ReadonlyArray<string>,
|
2025-04-16 12:56:08 -04:00
|
|
|
|
getCommandConfirmation: (
|
|
|
|
|
|
command: Array<string>,
|
|
|
|
|
|
applyPatch: ApplyPatchCommand | undefined,
|
|
|
|
|
|
) => Promise<CommandConfirmation>,
|
|
|
|
|
|
abortSignal?: AbortSignal,
|
|
|
|
|
|
): Promise<HandleExecCommandResult> {
|
|
|
|
|
|
const { cmd: command } = args;
|
|
|
|
|
|
|
|
|
|
|
|
const key = deriveCommandKey(command);
|
|
|
|
|
|
|
|
|
|
|
|
// 1) If the user has already said "always approve", skip
|
|
|
|
|
|
// any policy & never sandbox.
|
|
|
|
|
|
if (alwaysApprovedCommands.has(key)) {
|
|
|
|
|
|
return execCommand(
|
|
|
|
|
|
args,
|
|
|
|
|
|
/* applyPatch */ undefined,
|
|
|
|
|
|
/* runInSandbox */ false,
|
2025-04-17 15:39:26 -07:00
|
|
|
|
additionalWritableRoots,
|
2025-04-16 12:56:08 -04:00
|
|
|
|
abortSignal,
|
|
|
|
|
|
).then(convertSummaryToResult);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 2) Otherwise fall back to the normal policy
|
|
|
|
|
|
// `canAutoApprove` now requires the list of writable roots that the command
|
|
|
|
|
|
// is allowed to modify. For the CLI we conservatively pass the current
|
|
|
|
|
|
// working directory so that edits are constrained to the project root. If
|
|
|
|
|
|
// the caller wishes to broaden or restrict the set it can be made
|
|
|
|
|
|
// configurable in the future.
|
|
|
|
|
|
const safety = canAutoApprove(command, policy, [process.cwd()]);
|
|
|
|
|
|
|
|
|
|
|
|
let runInSandbox: boolean;
|
|
|
|
|
|
switch (safety.type) {
|
|
|
|
|
|
case "ask-user": {
|
|
|
|
|
|
const review = await askUserPermission(
|
|
|
|
|
|
args,
|
|
|
|
|
|
safety.applyPatch,
|
|
|
|
|
|
getCommandConfirmation,
|
|
|
|
|
|
);
|
|
|
|
|
|
if (review != null) {
|
|
|
|
|
|
return review;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
runInSandbox = false;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
case "auto-approve": {
|
|
|
|
|
|
runInSandbox = safety.runInSandbox;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
case "reject": {
|
|
|
|
|
|
return {
|
|
|
|
|
|
outputText: "aborted",
|
|
|
|
|
|
metadata: {
|
|
|
|
|
|
error: "command rejected",
|
|
|
|
|
|
reason: "Command rejected by auto-approval system.",
|
|
|
|
|
|
},
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const { applyPatch } = safety;
|
|
|
|
|
|
const summary = await execCommand(
|
|
|
|
|
|
args,
|
|
|
|
|
|
applyPatch,
|
|
|
|
|
|
runInSandbox,
|
2025-04-17 15:39:26 -07:00
|
|
|
|
additionalWritableRoots,
|
2025-04-16 12:56:08 -04:00
|
|
|
|
abortSignal,
|
|
|
|
|
|
);
|
|
|
|
|
|
// If the operation was aborted in the meantime, propagate the cancellation
|
|
|
|
|
|
// upward by returning an empty (no‑op) result so that the agent loop will
|
|
|
|
|
|
// exit cleanly without emitting spurious output.
|
|
|
|
|
|
if (abortSignal?.aborted) {
|
|
|
|
|
|
return {
|
|
|
|
|
|
outputText: "",
|
|
|
|
|
|
metadata: {},
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
if (
|
|
|
|
|
|
summary.exitCode !== 0 &&
|
|
|
|
|
|
runInSandbox &&
|
|
|
|
|
|
// Default: If the user has configured to ignore and continue,
|
|
|
|
|
|
// skip re-running the command.
|
|
|
|
|
|
//
|
|
|
|
|
|
// Otherwise, if they selected "ask-user", then we should ask the user
|
|
|
|
|
|
// for permission to re-run the command outside of the sandbox.
|
|
|
|
|
|
config.fullAutoErrorMode &&
|
|
|
|
|
|
config.fullAutoErrorMode === FullAutoErrorMode.ASK_USER
|
|
|
|
|
|
) {
|
|
|
|
|
|
const review = await askUserPermission(
|
|
|
|
|
|
args,
|
|
|
|
|
|
safety.applyPatch,
|
|
|
|
|
|
getCommandConfirmation,
|
|
|
|
|
|
);
|
|
|
|
|
|
if (review != null) {
|
|
|
|
|
|
return review;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// The user has approved the command, so we will run it outside of the
|
|
|
|
|
|
// sandbox.
|
2025-04-17 15:39:26 -07:00
|
|
|
|
const summary = await execCommand(
|
|
|
|
|
|
args,
|
|
|
|
|
|
applyPatch,
|
|
|
|
|
|
false,
|
|
|
|
|
|
additionalWritableRoots,
|
|
|
|
|
|
abortSignal,
|
|
|
|
|
|
);
|
2025-04-16 12:56:08 -04:00
|
|
|
|
return convertSummaryToResult(summary);
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
return convertSummaryToResult(summary);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function convertSummaryToResult(
|
|
|
|
|
|
summary: ExecCommandSummary,
|
|
|
|
|
|
): HandleExecCommandResult {
|
|
|
|
|
|
const { stdout, stderr, exitCode, durationMs } = summary;
|
|
|
|
|
|
return {
|
|
|
|
|
|
outputText: stdout || stderr,
|
|
|
|
|
|
metadata: {
|
|
|
|
|
|
exit_code: exitCode,
|
|
|
|
|
|
duration_seconds: Math.round(durationMs / 100) / 10,
|
|
|
|
|
|
},
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
type ExecCommandSummary = {
|
|
|
|
|
|
stdout: string;
|
|
|
|
|
|
stderr: string;
|
|
|
|
|
|
exitCode: number;
|
|
|
|
|
|
durationMs: number;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
async function execCommand(
|
|
|
|
|
|
execInput: ExecInput,
|
|
|
|
|
|
applyPatchCommand: ApplyPatchCommand | undefined,
|
|
|
|
|
|
runInSandbox: boolean,
|
2025-04-17 15:39:26 -07:00
|
|
|
|
additionalWritableRoots: ReadonlyArray<string>,
|
2025-04-16 12:56:08 -04:00
|
|
|
|
abortSignal?: AbortSignal,
|
|
|
|
|
|
): Promise<ExecCommandSummary> {
|
2025-04-17 22:14:12 +08:00
|
|
|
|
let { workdir } = execInput;
|
|
|
|
|
|
if (workdir) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
await access(workdir);
|
|
|
|
|
|
} catch (e) {
|
|
|
|
|
|
log(`EXEC workdir=${workdir} not found, use process.cwd() instead`);
|
|
|
|
|
|
workdir = process.cwd();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-04-16 12:56:08 -04:00
|
|
|
|
if (isLoggingEnabled()) {
|
|
|
|
|
|
if (applyPatchCommand != null) {
|
|
|
|
|
|
log("EXEC running apply_patch command");
|
|
|
|
|
|
} else {
|
2025-04-17 22:14:12 +08:00
|
|
|
|
const { cmd, timeoutInMillis } = execInput;
|
2025-04-16 12:56:08 -04:00
|
|
|
|
// Seconds are a bit easier to read in log messages and most timeouts
|
|
|
|
|
|
// are specified as multiples of 1000, anyway.
|
|
|
|
|
|
const timeout =
|
|
|
|
|
|
timeoutInMillis != null
|
|
|
|
|
|
? Math.round(timeoutInMillis / 1000).toString()
|
|
|
|
|
|
: "undefined";
|
|
|
|
|
|
log(
|
|
|
|
|
|
`EXEC running \`${formatCommandForDisplay(
|
|
|
|
|
|
cmd,
|
|
|
|
|
|
)}\` in workdir=${workdir} with timeout=${timeout}s`,
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Note execApplyPatch() and exec() are coded defensively and should not
|
|
|
|
|
|
// throw. Any internal errors should be mapped to a non-zero value for the
|
|
|
|
|
|
// exitCode field.
|
|
|
|
|
|
const start = Date.now();
|
|
|
|
|
|
const execResult =
|
|
|
|
|
|
applyPatchCommand != null
|
|
|
|
|
|
? execApplyPatch(applyPatchCommand.patch)
|
2025-04-17 15:39:26 -07:00
|
|
|
|
: await exec(
|
|
|
|
|
|
{ ...execInput, additionalWritableRoots },
|
|
|
|
|
|
await getSandbox(runInSandbox),
|
|
|
|
|
|
abortSignal,
|
|
|
|
|
|
);
|
2025-04-16 12:56:08 -04:00
|
|
|
|
const duration = Date.now() - start;
|
|
|
|
|
|
const { stdout, stderr, exitCode } = execResult;
|
|
|
|
|
|
|
|
|
|
|
|
if (isLoggingEnabled()) {
|
|
|
|
|
|
log(
|
|
|
|
|
|
`EXEC exit=${exitCode} time=${duration}ms:\n\tSTDOUT: ${stdout}\n\tSTDERR: ${stderr}`,
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
stdout,
|
|
|
|
|
|
stderr,
|
|
|
|
|
|
exitCode,
|
|
|
|
|
|
durationMs: duration,
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-04-17 08:15:39 -07:00
|
|
|
|
const isInLinux = async (): Promise<boolean> => {
|
2025-04-16 12:56:08 -04:00
|
|
|
|
try {
|
|
|
|
|
|
await access("/proc/1/cgroup");
|
|
|
|
|
|
return true;
|
|
|
|
|
|
} catch {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
async function getSandbox(runInSandbox: boolean): Promise<SandboxType> {
|
|
|
|
|
|
if (runInSandbox) {
|
|
|
|
|
|
if (process.platform === "darwin") {
|
|
|
|
|
|
return SandboxType.MACOS_SEATBELT;
|
2025-04-17 08:15:39 -07:00
|
|
|
|
} else if (await isInLinux()) {
|
2025-04-16 12:56:08 -04:00
|
|
|
|
return SandboxType.NONE;
|
2025-04-17 18:31:19 +00:00
|
|
|
|
} else if (process.platform === "win32") {
|
|
|
|
|
|
// On Windows, we don't have a sandbox implementation yet, so we fall back to NONE
|
|
|
|
|
|
// instead of throwing an error, which would crash the application
|
|
|
|
|
|
log(
|
|
|
|
|
|
"WARNING: Sandbox was requested but is not available on Windows. Continuing without sandbox.",
|
|
|
|
|
|
);
|
|
|
|
|
|
return SandboxType.NONE;
|
2025-04-16 12:56:08 -04:00
|
|
|
|
}
|
2025-04-17 18:31:19 +00:00
|
|
|
|
// For other platforms, still throw an error as before
|
2025-04-16 12:56:08 -04:00
|
|
|
|
throw new Error("Sandbox was mandated, but no sandbox is available!");
|
|
|
|
|
|
} else {
|
|
|
|
|
|
return SandboxType.NONE;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* If return value is non-null, then the command was rejected by the user.
|
|
|
|
|
|
*/
|
|
|
|
|
|
async function askUserPermission(
|
|
|
|
|
|
args: ExecInput,
|
|
|
|
|
|
applyPatchCommand: ApplyPatchCommand | undefined,
|
|
|
|
|
|
getCommandConfirmation: (
|
|
|
|
|
|
command: Array<string>,
|
|
|
|
|
|
applyPatch: ApplyPatchCommand | undefined,
|
|
|
|
|
|
) => Promise<CommandConfirmation>,
|
|
|
|
|
|
): Promise<HandleExecCommandResult | null> {
|
|
|
|
|
|
const { review: decision, customDenyMessage } = await getCommandConfirmation(
|
|
|
|
|
|
args.cmd,
|
|
|
|
|
|
applyPatchCommand,
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
if (decision === ReviewDecision.ALWAYS) {
|
|
|
|
|
|
// Persist this command so we won't ask again during this session.
|
|
|
|
|
|
const key = deriveCommandKey(args.cmd);
|
|
|
|
|
|
alwaysApprovedCommands.add(key);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-04-18 06:28:58 +10:00
|
|
|
|
// Handle EXPLAIN decision by returning null to continue with the normal flow
|
|
|
|
|
|
// but with a flag to indicate that an explanation was requested
|
|
|
|
|
|
if (decision === ReviewDecision.EXPLAIN) {
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Any decision other than an affirmative (YES / ALWAYS) or EXPLAIN aborts execution.
|
2025-04-16 12:56:08 -04:00
|
|
|
|
if (decision !== ReviewDecision.YES && decision !== ReviewDecision.ALWAYS) {
|
|
|
|
|
|
const note =
|
|
|
|
|
|
decision === ReviewDecision.NO_CONTINUE
|
|
|
|
|
|
? customDenyMessage?.trim() || "No, don't do that — keep going though."
|
|
|
|
|
|
: "No, don't do that — stop for now.";
|
|
|
|
|
|
return {
|
|
|
|
|
|
outputText: "aborted",
|
|
|
|
|
|
metadata: {},
|
|
|
|
|
|
additionalItems: [
|
|
|
|
|
|
{
|
|
|
|
|
|
type: "message",
|
|
|
|
|
|
role: "user",
|
|
|
|
|
|
content: [{ type: "input_text", text: note }],
|
|
|
|
|
|
},
|
|
|
|
|
|
],
|
|
|
|
|
|
};
|
|
|
|
|
|
} else {
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|