Files
llmx/codex-cli/src/utils/agent/handle-exec-command.ts

333 lines
10 KiB
TypeScript
Raw Normal View History

import type { CommandConfirmation } from "./agent-loop.js";
import type { AppConfig } from "../config.js";
import type { ExecInput } from "./sandbox/interface.js";
import type { ApplyPatchCommand, ApprovalPolicy } from "../../approvals.js";
import type { ResponseInputItem } from "openai/resources/responses/responses.mjs";
import { exec, execApplyPatch } from "./exec.js";
import { isLoggingEnabled, log } from "./log.js";
import { ReviewDecision } from "./review.js";
import { FullAutoErrorMode } from "../auto-approval-mode.js";
import { SandboxType } from "./sandbox/interface.js";
import { canAutoApprove } from "../../approvals.js";
import { formatCommandForDisplay } from "../../format-command.js";
import { access } from "fs/promises";
// ---------------------------------------------------------------------------
// Sessionlevel cache of commands that the user has chosen to always approve.
//
// The values are derived via `deriveCommandKey()` which intentionally ignores
// volatile arguments (for example the patch text passed to `apply_patch`).
// Storing *generalised* keys means that once a user selects "always approve"
// for a given class of command we will genuinely stop prompting them for
// subsequent, equivalent invocations during the same CLI session.
// ---------------------------------------------------------------------------
const alwaysApprovedCommands = new Set<string>();
// ---------------------------------------------------------------------------
// Helper: Given the argv-style representation of a command, return a stable
// string key that can be used for equality checks.
//
// The key space purposefully abstracts away parts of the command line that
// are expected to change between invocations while still retaining enough
// information to differentiate *meaningfully distinct* operations. See the
// extensive inline documentation for details.
// ---------------------------------------------------------------------------
function deriveCommandKey(cmd: Array<string>): string {
// pull off only the bits you care about
const [
maybeShell,
maybeFlag,
coreInvocation,
/* …ignore the rest… */
] = cmd;
if (coreInvocation?.startsWith("apply_patch")) {
return "apply_patch";
}
if (maybeShell === "bash" && maybeFlag === "-lc") {
// If the command was invoked through `bash -lc "<script>"` we extract the
// base program name from the script string.
const script = coreInvocation ?? "";
return script.split(/\s+/)[0] || "bash";
}
// For every other command we fall back to using only the program name (the
// first argv element). This guarantees we always return a *string* even if
// `coreInvocation` is undefined.
if (coreInvocation) {
return coreInvocation.split(/\s+/)[0]!;
}
return JSON.stringify(cmd);
}
type HandleExecCommandResult = {
outputText: string;
metadata: Record<string, unknown>;
additionalItems?: Array<ResponseInputItem>;
};
export async function handleExecCommand(
args: ExecInput,
config: AppConfig,
policy: ApprovalPolicy,
getCommandConfirmation: (
command: Array<string>,
applyPatch: ApplyPatchCommand | undefined,
) => Promise<CommandConfirmation>,
abortSignal?: AbortSignal,
): Promise<HandleExecCommandResult> {
const { cmd: command } = args;
const key = deriveCommandKey(command);
// 1) If the user has already said "always approve", skip
// any policy & never sandbox.
if (alwaysApprovedCommands.has(key)) {
return execCommand(
args,
/* applyPatch */ undefined,
/* runInSandbox */ false,
abortSignal,
).then(convertSummaryToResult);
}
// 2) Otherwise fall back to the normal policy
// `canAutoApprove` now requires the list of writable roots that the command
// is allowed to modify. For the CLI we conservatively pass the current
// working directory so that edits are constrained to the project root. If
// the caller wishes to broaden or restrict the set it can be made
// configurable in the future.
const safety = canAutoApprove(command, policy, [process.cwd()]);
let runInSandbox: boolean;
switch (safety.type) {
case "ask-user": {
const review = await askUserPermission(
args,
safety.applyPatch,
getCommandConfirmation,
);
if (review != null) {
return review;
}
runInSandbox = false;
break;
}
case "auto-approve": {
runInSandbox = safety.runInSandbox;
break;
}
case "reject": {
return {
outputText: "aborted",
metadata: {
error: "command rejected",
reason: "Command rejected by auto-approval system.",
},
};
}
}
const { applyPatch } = safety;
const summary = await execCommand(
args,
applyPatch,
runInSandbox,
abortSignal,
);
// If the operation was aborted in the meantime, propagate the cancellation
// upward by returning an empty (noop) result so that the agent loop will
// exit cleanly without emitting spurious output.
if (abortSignal?.aborted) {
return {
outputText: "",
metadata: {},
};
}
if (
summary.exitCode !== 0 &&
runInSandbox &&
// Default: If the user has configured to ignore and continue,
// skip re-running the command.
//
// Otherwise, if they selected "ask-user", then we should ask the user
// for permission to re-run the command outside of the sandbox.
config.fullAutoErrorMode &&
config.fullAutoErrorMode === FullAutoErrorMode.ASK_USER
) {
const review = await askUserPermission(
args,
safety.applyPatch,
getCommandConfirmation,
);
if (review != null) {
return review;
} else {
// The user has approved the command, so we will run it outside of the
// sandbox.
const summary = await execCommand(args, applyPatch, false, abortSignal);
return convertSummaryToResult(summary);
}
} else {
return convertSummaryToResult(summary);
}
}
function convertSummaryToResult(
summary: ExecCommandSummary,
): HandleExecCommandResult {
const { stdout, stderr, exitCode, durationMs } = summary;
return {
outputText: stdout || stderr,
metadata: {
exit_code: exitCode,
duration_seconds: Math.round(durationMs / 100) / 10,
},
};
}
type ExecCommandSummary = {
stdout: string;
stderr: string;
exitCode: number;
durationMs: number;
};
async function execCommand(
execInput: ExecInput,
applyPatchCommand: ApplyPatchCommand | undefined,
runInSandbox: boolean,
abortSignal?: AbortSignal,
): Promise<ExecCommandSummary> {
let { workdir } = execInput;
if (workdir) {
try {
await access(workdir);
} catch (e) {
log(`EXEC workdir=${workdir} not found, use process.cwd() instead`);
workdir = process.cwd();
}
}
if (isLoggingEnabled()) {
if (applyPatchCommand != null) {
log("EXEC running apply_patch command");
} else {
const { cmd, timeoutInMillis } = execInput;
// Seconds are a bit easier to read in log messages and most timeouts
// are specified as multiples of 1000, anyway.
const timeout =
timeoutInMillis != null
? Math.round(timeoutInMillis / 1000).toString()
: "undefined";
log(
`EXEC running \`${formatCommandForDisplay(
cmd,
)}\` in workdir=${workdir} with timeout=${timeout}s`,
);
}
}
// Note execApplyPatch() and exec() are coded defensively and should not
// throw. Any internal errors should be mapped to a non-zero value for the
// exitCode field.
const start = Date.now();
const execResult =
applyPatchCommand != null
? execApplyPatch(applyPatchCommand.patch)
: await exec(execInput, await getSandbox(runInSandbox), abortSignal);
const duration = Date.now() - start;
const { stdout, stderr, exitCode } = execResult;
if (isLoggingEnabled()) {
log(
`EXEC exit=${exitCode} time=${duration}ms:\n\tSTDOUT: ${stdout}\n\tSTDERR: ${stderr}`,
);
}
return {
stdout,
stderr,
exitCode,
durationMs: duration,
};
}
const isInLinux = async (): Promise<boolean> => {
try {
await access("/proc/1/cgroup");
return true;
} catch {
return false;
}
};
async function getSandbox(runInSandbox: boolean): Promise<SandboxType> {
if (runInSandbox) {
if (process.platform === "darwin") {
return SandboxType.MACOS_SEATBELT;
} else if (await isInLinux()) {
return SandboxType.NONE;
} else if (process.platform === "win32") {
// On Windows, we don't have a sandbox implementation yet, so we fall back to NONE
// instead of throwing an error, which would crash the application
log(
"WARNING: Sandbox was requested but is not available on Windows. Continuing without sandbox.",
);
return SandboxType.NONE;
}
// For other platforms, still throw an error as before
throw new Error("Sandbox was mandated, but no sandbox is available!");
} else {
return SandboxType.NONE;
}
}
/**
* If return value is non-null, then the command was rejected by the user.
*/
async function askUserPermission(
args: ExecInput,
applyPatchCommand: ApplyPatchCommand | undefined,
getCommandConfirmation: (
command: Array<string>,
applyPatch: ApplyPatchCommand | undefined,
) => Promise<CommandConfirmation>,
): Promise<HandleExecCommandResult | null> {
const { review: decision, customDenyMessage } = await getCommandConfirmation(
args.cmd,
applyPatchCommand,
);
if (decision === ReviewDecision.ALWAYS) {
// Persist this command so we won't ask again during this session.
const key = deriveCommandKey(args.cmd);
alwaysApprovedCommands.add(key);
}
// Any decision other than an affirmative (YES / ALWAYS) aborts execution.
if (decision !== ReviewDecision.YES && decision !== ReviewDecision.ALWAYS) {
const note =
decision === ReviewDecision.NO_CONTINUE
? customDenyMessage?.trim() || "No, don't do that — keep going though."
: "No, don't do that — stop for now.";
return {
outputText: "aborted",
metadata: {},
additionalItems: [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: note }],
},
],
};
} else {
return null;
}
}