Files
llmx/codex-cli/src/utils/agent/handle-exec-command.ts
Thibault Sottiaux dc276999a9 chore: improve storage/ implementation; use log(...) consistently (#473)
This PR tidies up primitives under storage/.

**Noop changes:**

* Promote logger implementation to top-level utility outside of agent/
* Use logger within storage primitives
* Cleanup doc strings and comments

**Functional changes:**

* Increase command history size to 10_000
* Remove unnecessary debounce implementation and ensure a session ID is
created only once per agent loop

---------

Signed-off-by: Thibault Sottiaux <tibo@openai.com>
2025-04-21 09:51:34 -04:00

352 lines
11 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import type { CommandConfirmation } from "./agent-loop.js";
import type { AppConfig } from "../config.js";
import type { ExecInput } from "./sandbox/interface.js";
import type { ApplyPatchCommand, ApprovalPolicy } from "../../approvals.js";
import type { ResponseInputItem } from "openai/resources/responses/responses.mjs";
import { exec, execApplyPatch } from "./exec.js";
import { ReviewDecision } from "./review.js";
import { FullAutoErrorMode } from "../auto-approval-mode.js";
import { SandboxType } from "./sandbox/interface.js";
import { canAutoApprove } from "../../approvals.js";
import { formatCommandForDisplay } from "../../format-command.js";
import { isLoggingEnabled, log } from "../logger/log.js";
import { access } from "fs/promises";
// ---------------------------------------------------------------------------
// Sessionlevel cache of commands that the user has chosen to always approve.
//
// The values are derived via `deriveCommandKey()` which intentionally ignores
// volatile arguments (for example the patch text passed to `apply_patch`).
// Storing *generalised* keys means that once a user selects "always approve"
// for a given class of command we will genuinely stop prompting them for
// subsequent, equivalent invocations during the same CLI session.
// ---------------------------------------------------------------------------
const alwaysApprovedCommands = new Set<string>();
// ---------------------------------------------------------------------------
// Helper: Given the argv-style representation of a command, return a stable
// string key that can be used for equality checks.
//
// The key space purposefully abstracts away parts of the command line that
// are expected to change between invocations while still retaining enough
// information to differentiate *meaningfully distinct* operations. See the
// extensive inline documentation for details.
// ---------------------------------------------------------------------------
function deriveCommandKey(cmd: Array<string>): string {
// pull off only the bits you care about
const [
maybeShell,
maybeFlag,
coreInvocation,
/* …ignore the rest… */
] = cmd;
if (coreInvocation?.startsWith("apply_patch")) {
return "apply_patch";
}
if (maybeShell === "bash" && maybeFlag === "-lc") {
// If the command was invoked through `bash -lc "<script>"` we extract the
// base program name from the script string.
const script = coreInvocation ?? "";
return script.split(/\s+/)[0] || "bash";
}
// For every other command we fall back to using only the program name (the
// first argv element). This guarantees we always return a *string* even if
// `coreInvocation` is undefined.
if (coreInvocation) {
return coreInvocation.split(/\s+/)[0]!;
}
return JSON.stringify(cmd);
}
type HandleExecCommandResult = {
outputText: string;
metadata: Record<string, unknown>;
additionalItems?: Array<ResponseInputItem>;
};
export async function handleExecCommand(
args: ExecInput,
config: AppConfig,
policy: ApprovalPolicy,
additionalWritableRoots: ReadonlyArray<string>,
getCommandConfirmation: (
command: Array<string>,
applyPatch: ApplyPatchCommand | undefined,
) => Promise<CommandConfirmation>,
abortSignal?: AbortSignal,
): Promise<HandleExecCommandResult> {
const { cmd: command } = args;
const key = deriveCommandKey(command);
// 1) If the user has already said "always approve", skip
// any policy & never sandbox.
if (alwaysApprovedCommands.has(key)) {
return execCommand(
args,
/* applyPatch */ undefined,
/* runInSandbox */ false,
additionalWritableRoots,
abortSignal,
).then(convertSummaryToResult);
}
// 2) Otherwise fall back to the normal policy
// `canAutoApprove` now requires the list of writable roots that the command
// is allowed to modify. For the CLI we conservatively pass the current
// working directory so that edits are constrained to the project root. If
// the caller wishes to broaden or restrict the set it can be made
// configurable in the future.
const safety = canAutoApprove(command, policy, [process.cwd()]);
let runInSandbox: boolean;
switch (safety.type) {
case "ask-user": {
const review = await askUserPermission(
args,
safety.applyPatch,
getCommandConfirmation,
);
if (review != null) {
return review;
}
runInSandbox = false;
break;
}
case "auto-approve": {
runInSandbox = safety.runInSandbox;
break;
}
case "reject": {
return {
outputText: "aborted",
metadata: {
error: "command rejected",
reason: "Command rejected by auto-approval system.",
},
};
}
}
const { applyPatch } = safety;
const summary = await execCommand(
args,
applyPatch,
runInSandbox,
additionalWritableRoots,
abortSignal,
);
// If the operation was aborted in the meantime, propagate the cancellation
// upward by returning an empty (noop) result so that the agent loop will
// exit cleanly without emitting spurious output.
if (abortSignal?.aborted) {
return {
outputText: "",
metadata: {},
};
}
if (
summary.exitCode !== 0 &&
runInSandbox &&
// Default: If the user has configured to ignore and continue,
// skip re-running the command.
//
// Otherwise, if they selected "ask-user", then we should ask the user
// for permission to re-run the command outside of the sandbox.
config.fullAutoErrorMode &&
config.fullAutoErrorMode === FullAutoErrorMode.ASK_USER
) {
const review = await askUserPermission(
args,
safety.applyPatch,
getCommandConfirmation,
);
if (review != null) {
return review;
} else {
// The user has approved the command, so we will run it outside of the
// sandbox.
const summary = await execCommand(
args,
applyPatch,
false,
additionalWritableRoots,
abortSignal,
);
return convertSummaryToResult(summary);
}
} else {
return convertSummaryToResult(summary);
}
}
function convertSummaryToResult(
summary: ExecCommandSummary,
): HandleExecCommandResult {
const { stdout, stderr, exitCode, durationMs } = summary;
return {
outputText: stdout || stderr,
metadata: {
exit_code: exitCode,
duration_seconds: Math.round(durationMs / 100) / 10,
},
};
}
type ExecCommandSummary = {
stdout: string;
stderr: string;
exitCode: number;
durationMs: number;
};
async function execCommand(
execInput: ExecInput,
applyPatchCommand: ApplyPatchCommand | undefined,
runInSandbox: boolean,
additionalWritableRoots: ReadonlyArray<string>,
abortSignal?: AbortSignal,
): Promise<ExecCommandSummary> {
let { workdir } = execInput;
if (workdir) {
try {
await access(workdir);
} catch (e) {
log(`EXEC workdir=${workdir} not found, use process.cwd() instead`);
workdir = process.cwd();
}
}
if (applyPatchCommand != null) {
log("EXEC running apply_patch command");
} else if (isLoggingEnabled()) {
const { cmd, timeoutInMillis } = execInput;
// Seconds are a bit easier to read in log messages and most timeouts
// are specified as multiples of 1000, anyway.
const timeout =
timeoutInMillis != null
? Math.round(timeoutInMillis / 1000).toString()
: "undefined";
log(
`EXEC running \`${formatCommandForDisplay(
cmd,
)}\` in workdir=${workdir} with timeout=${timeout}s`,
);
}
// Note execApplyPatch() and exec() are coded defensively and should not
// throw. Any internal errors should be mapped to a non-zero value for the
// exitCode field.
const start = Date.now();
const execResult =
applyPatchCommand != null
? execApplyPatch(applyPatchCommand.patch)
: await exec(
{ ...execInput, additionalWritableRoots },
await getSandbox(runInSandbox),
abortSignal,
);
const duration = Date.now() - start;
const { stdout, stderr, exitCode } = execResult;
if (isLoggingEnabled()) {
log(
`EXEC exit=${exitCode} time=${duration}ms:\n\tSTDOUT: ${stdout}\n\tSTDERR: ${stderr}`,
);
}
return {
stdout,
stderr,
exitCode,
durationMs: duration,
};
}
const isInLinux = async (): Promise<boolean> => {
try {
await access("/proc/1/cgroup");
return true;
} catch {
return false;
}
};
async function getSandbox(runInSandbox: boolean): Promise<SandboxType> {
if (runInSandbox) {
if (process.platform === "darwin") {
return SandboxType.MACOS_SEATBELT;
} else if (await isInLinux()) {
return SandboxType.NONE;
} else if (process.platform === "win32") {
// On Windows, we don't have a sandbox implementation yet, so we fall back to NONE
// instead of throwing an error, which would crash the application
log(
"WARNING: Sandbox was requested but is not available on Windows. Continuing without sandbox.",
);
return SandboxType.NONE;
}
// For other platforms, still throw an error as before
throw new Error("Sandbox was mandated, but no sandbox is available!");
} else {
return SandboxType.NONE;
}
}
/**
* If return value is non-null, then the command was rejected by the user.
*/
async function askUserPermission(
args: ExecInput,
applyPatchCommand: ApplyPatchCommand | undefined,
getCommandConfirmation: (
command: Array<string>,
applyPatch: ApplyPatchCommand | undefined,
) => Promise<CommandConfirmation>,
): Promise<HandleExecCommandResult | null> {
const { review: decision, customDenyMessage } = await getCommandConfirmation(
args.cmd,
applyPatchCommand,
);
if (decision === ReviewDecision.ALWAYS) {
// Persist this command so we won't ask again during this session.
const key = deriveCommandKey(args.cmd);
alwaysApprovedCommands.add(key);
}
// Handle EXPLAIN decision by returning null to continue with the normal flow
// but with a flag to indicate that an explanation was requested
if (decision === ReviewDecision.EXPLAIN) {
return null;
}
// Any decision other than an affirmative (YES / ALWAYS) or EXPLAIN aborts execution.
if (decision !== ReviewDecision.YES && decision !== ReviewDecision.ALWAYS) {
const note =
decision === ReviewDecision.NO_CONTINUE
? customDenyMessage?.trim() || "No, don't do that — keep going though."
: "No, don't do that — stop for now.";
return {
outputText: "aborted",
metadata: {},
additionalItems: [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: note }],
},
],
};
} else {
return null;
}
}