Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>
2025-04-16 12:56:08 -04:00
commit 59a180ddec
163 changed files with 30587 additions and 0 deletions
--- a/codex-cli/src/utils/agent/handle-exec-command.ts
+++ b/codex-cli/src/utils/agent/handle-exec-command.ts
@@ -0,0 +1,315 @@
+import type { CommandConfirmation } from "./agent-loop.js";
+import type { AppConfig } from "../config.js";
+import type { ExecInput } from "./sandbox/interface.js";
+import type { ApplyPatchCommand, ApprovalPolicy } from "@lib/approvals.js";
+import type { ResponseInputItem } from "openai/resources/responses/responses.mjs";
+
+import { exec, execApplyPatch } from "./exec.js";
+import { isLoggingEnabled, log } from "./log.js";
+import { ReviewDecision } from "./review.js";
+import { FullAutoErrorMode } from "../auto-approval-mode.js";
+import { SandboxType } from "./sandbox/interface.js";
+import { canAutoApprove } from "@lib/approvals.js";
+import { formatCommandForDisplay } from "@lib/format-command.js";
+import { access } from "fs/promises";
+
+// ---------------------------------------------------------------------------
+// Session‑level cache of commands that the user has chosen to always approve.
+//
+// The values are derived via `deriveCommandKey()` which intentionally ignores
+// volatile arguments (for example the patch text passed to `apply_patch`).
+// Storing *generalised* keys means that once a user selects "always approve"
+// for a given class of command we will genuinely stop prompting them for
+// subsequent, equivalent invocations during the same CLI session.
+// ---------------------------------------------------------------------------
+const alwaysApprovedCommands = new Set<string>();
+
+// ---------------------------------------------------------------------------
+// Helper: Given the argv-style representation of a command, return a stable
+// string key that can be used for equality checks.
+//
+// The key space purposefully abstracts away parts of the command line that
+// are expected to change between invocations while still retaining enough
+// information to differentiate *meaningfully distinct* operations.  See the
+// extensive inline documentation for details.
+// ---------------------------------------------------------------------------
+
+function deriveCommandKey(cmd: Array<string>): string {
+  // pull off only the bits you care about
+  const [
+    maybeShell,
+    maybeFlag,
+    coreInvocation,
+    /* …ignore the rest… */
+  ] = cmd;
+
+  if (coreInvocation?.startsWith("apply_patch")) {
+    return "apply_patch";
+  }
+
+  if (maybeShell === "bash" && maybeFlag === "-lc") {
+    // If the command was invoked through `bash -lc "<script>"` we extract the
+    // base program name from the script string.
+    const script = coreInvocation ?? "";
+    return script.split(/\s+/)[0] || "bash";
+  }
+
+  // For every other command we fall back to using only the program name (the
+  // first argv element).  This guarantees we always return a *string* even if
+  // `coreInvocation` is undefined.
+  if (coreInvocation) {
+    return coreInvocation.split(/\s+/)[0]!;
+  }
+
+  return JSON.stringify(cmd);
+}
+
+type HandleExecCommandResult = {
+  outputText: string;
+  metadata: Record<string, unknown>;
+  additionalItems?: Array<ResponseInputItem>;
+};
+
+export async function handleExecCommand(
+  args: ExecInput,
+  config: AppConfig,
+  policy: ApprovalPolicy,
+  getCommandConfirmation: (
+    command: Array<string>,
+    applyPatch: ApplyPatchCommand | undefined,
+  ) => Promise<CommandConfirmation>,
+  abortSignal?: AbortSignal,
+): Promise<HandleExecCommandResult> {
+  const { cmd: command } = args;
+
+  const key = deriveCommandKey(command);
+
+  // 1) If the user has already said "always approve", skip
+  //    any policy & never sandbox.
+  if (alwaysApprovedCommands.has(key)) {
+    return execCommand(
+      args,
+      /* applyPatch */ undefined,
+      /* runInSandbox */ false,
+      abortSignal,
+    ).then(convertSummaryToResult);
+  }
+
+  // 2) Otherwise fall back to the normal policy
+  // `canAutoApprove` now requires the list of writable roots that the command
+  // is allowed to modify.  For the CLI we conservatively pass the current
+  // working directory so that edits are constrained to the project root.  If
+  // the caller wishes to broaden or restrict the set it can be made
+  // configurable in the future.
+  const safety = canAutoApprove(command, policy, [process.cwd()]);
+
+  let runInSandbox: boolean;
+  switch (safety.type) {
+    case "ask-user": {
+      const review = await askUserPermission(
+        args,
+        safety.applyPatch,
+        getCommandConfirmation,
+      );
+      if (review != null) {
+        return review;
+      }
+
+      runInSandbox = false;
+      break;
+    }
+    case "auto-approve": {
+      runInSandbox = safety.runInSandbox;
+      break;
+    }
+    case "reject": {
+      return {
+        outputText: "aborted",
+        metadata: {
+          error: "command rejected",
+          reason: "Command rejected by auto-approval system.",
+        },
+      };
+    }
+  }
+
+  const { applyPatch } = safety;
+  const summary = await execCommand(
+    args,
+    applyPatch,
+    runInSandbox,
+    abortSignal,
+  );
+  // If the operation was aborted in the meantime, propagate the cancellation
+  // upward by returning an empty (no‑op) result so that the agent loop will
+  // exit cleanly without emitting spurious output.
+  if (abortSignal?.aborted) {
+    return {
+      outputText: "",
+      metadata: {},
+    };
+  }
+  if (
+    summary.exitCode !== 0 &&
+    runInSandbox &&
+    // Default: If the user has configured to ignore and continue,
+    // skip re-running the command.
+    //
+    // Otherwise, if they selected "ask-user", then we should ask the user
+    // for permission to re-run the command outside of the sandbox.
+    config.fullAutoErrorMode &&
+    config.fullAutoErrorMode === FullAutoErrorMode.ASK_USER
+  ) {
+    const review = await askUserPermission(
+      args,
+      safety.applyPatch,
+      getCommandConfirmation,
+    );
+    if (review != null) {
+      return review;
+    } else {
+      // The user has approved the command, so we will run it outside of the
+      // sandbox.
+      const summary = await execCommand(args, applyPatch, false, abortSignal);
+      return convertSummaryToResult(summary);
+    }
+  } else {
+    return convertSummaryToResult(summary);
+  }
+}
+
+function convertSummaryToResult(
+  summary: ExecCommandSummary,
+): HandleExecCommandResult {
+  const { stdout, stderr, exitCode, durationMs } = summary;
+  return {
+    outputText: stdout || stderr,
+    metadata: {
+      exit_code: exitCode,
+      duration_seconds: Math.round(durationMs / 100) / 10,
+    },
+  };
+}
+
+type ExecCommandSummary = {
+  stdout: string;
+  stderr: string;
+  exitCode: number;
+  durationMs: number;
+};
+
+async function execCommand(
+  execInput: ExecInput,
+  applyPatchCommand: ApplyPatchCommand | undefined,
+  runInSandbox: boolean,
+  abortSignal?: AbortSignal,
+): Promise<ExecCommandSummary> {
+  if (isLoggingEnabled()) {
+    if (applyPatchCommand != null) {
+      log("EXEC running apply_patch command");
+    } else {
+      const { cmd, workdir, timeoutInMillis } = execInput;
+      // Seconds are a bit easier to read in log messages and most timeouts
+      // are specified as multiples of 1000, anyway.
+      const timeout =
+        timeoutInMillis != null
+          ? Math.round(timeoutInMillis / 1000).toString()
+          : "undefined";
+      log(
+        `EXEC running \`${formatCommandForDisplay(
+          cmd,
+        )}\` in workdir=${workdir} with timeout=${timeout}s`,
+      );
+    }
+  }
+
+  // Note execApplyPatch() and exec() are coded defensively and should not
+  // throw. Any internal errors should be mapped to a non-zero value for the
+  // exitCode field.
+  const start = Date.now();
+  const execResult =
+    applyPatchCommand != null
+      ? execApplyPatch(applyPatchCommand.patch)
+      : await exec(execInput, await getSandbox(runInSandbox), abortSignal);
+  const duration = Date.now() - start;
+  const { stdout, stderr, exitCode } = execResult;
+
+  if (isLoggingEnabled()) {
+    log(
+      `EXEC exit=${exitCode} time=${duration}ms:\n\tSTDOUT: ${stdout}\n\tSTDERR: ${stderr}`,
+    );
+  }
+
+  return {
+    stdout,
+    stderr,
+    exitCode,
+    durationMs: duration,
+  };
+}
+
+const isInContainer = async (): Promise<boolean> => {
+  try {
+    await access("/proc/1/cgroup");
+    return true;
+  } catch {
+    return false;
+  }
+};
+
+async function getSandbox(runInSandbox: boolean): Promise<SandboxType> {
+  if (runInSandbox) {
+    if (process.platform === "darwin") {
+      return SandboxType.MACOS_SEATBELT;
+    } else if (await isInContainer()) {
+      return SandboxType.NONE;
+    }
+    throw new Error("Sandbox was mandated, but no sandbox is available!");
+  } else {
+    return SandboxType.NONE;
+  }
+}
+
+/**
+ * If return value is non-null, then the command was rejected by the user.
+ */
+async function askUserPermission(
+  args: ExecInput,
+  applyPatchCommand: ApplyPatchCommand | undefined,
+  getCommandConfirmation: (
+    command: Array<string>,
+    applyPatch: ApplyPatchCommand | undefined,
+  ) => Promise<CommandConfirmation>,
+): Promise<HandleExecCommandResult | null> {
+  const { review: decision, customDenyMessage } = await getCommandConfirmation(
+    args.cmd,
+    applyPatchCommand,
+  );
+
+  if (decision === ReviewDecision.ALWAYS) {
+    // Persist this command so we won't ask again during this session.
+    const key = deriveCommandKey(args.cmd);
+    alwaysApprovedCommands.add(key);
+  }
+
+  // Any decision other than an affirmative (YES / ALWAYS) aborts execution.
+  if (decision !== ReviewDecision.YES && decision !== ReviewDecision.ALWAYS) {
+    const note =
+      decision === ReviewDecision.NO_CONTINUE
+        ? customDenyMessage?.trim() || "No, don't do that — keep going though."
+        : "No, don't do that — stop for now.";
+    return {
+      outputText: "aborted",
+      metadata: {},
+      additionalItems: [
+        {
+          type: "message",
+          role: "user",
+          content: [{ type: "input_text", text: note }],
+        },
+      ],
+    };
+  } else {
+    return null;
+  }
+}