Removes computeAutoApproval() and tightens up canAutoApprove() as the source of truth (#126)

Previously, `parseToolCall()` was using `computeAutoApproval()`, which was a somewhat parallel implementation of `canAutoApprove()` in order to get `SafeCommandReason` metadata for presenting information to the user. The only function that was using `SafeCommandReason` was `useMessageGrouping()`, but it turns out that function was unused, so this PR removes `computeAutoApproval()` and all code related to it. More importantly, I believe this fixes https://github.com/openai/codex/issues/87 because `computeAutoApproval()` was calling `parse()` from `shell-quote` without wrapping it in a try-catch. This PR updates `canAutoApprove()` to use a tighter try-catch block that is specific to `parse()` and returns an appropriate `SafetyAssessment` in the event of an error, based on the `ApprovalPolicy`. Signed-off-by: Michael Bolin <mbolin@openai.com>
2025-04-16 15:39:41 -07:00
parent 72d0230b33
commit fb6f798671
4 changed files with 80 additions and 239 deletions
--- a/codex-cli/src/approvals.ts
+++ b/codex-cli/src/approvals.ts
@@ -75,19 +75,72 @@ export function canAutoApprove(
  writableRoots: ReadonlyArray<string>,
  env: NodeJS.ProcessEnv = process.env,
 ): SafetyAssessment {
-  try {
-    if (command[0] === "apply_patch") {
-      return command.length === 2 && typeof command[1] === "string"
-        ? canAutoApproveApplyPatch(command[1], writableRoots, policy)
-        : {
-            type: "reject",
-            reason: "Invalid apply_patch command",
-          };
+  if (command[0] === "apply_patch") {
+    return command.length === 2 && typeof command[1] === "string"
+      ? canAutoApproveApplyPatch(command[1], writableRoots, policy)
+      : {
+          type: "reject",
+          reason: "Invalid apply_patch command",
+        };
+  }
+
+  const isSafe = isSafeCommand(command);
+  if (isSafe != null) {
+    const { reason, group } = isSafe;
+    return {
+      type: "auto-approve",
+      reason,
+      group,
+      runInSandbox: false,
+    };
+  }
+
+  if (
+    command[0] === "bash" &&
+    command[1] === "-lc" &&
+    typeof command[2] === "string" &&
+    command.length === 3
+  ) {
+    const applyPatchArg = tryParseApplyPatch(command[2]);
+    if (applyPatchArg != null) {
+      return canAutoApproveApplyPatch(applyPatchArg, writableRoots, policy);
    }

-    const isSafe = isSafeCommand(command);
-    if (isSafe != null) {
-      const { reason, group } = isSafe;
+    let bashCmd;
+    try {
+      bashCmd = parse(command[2], env);
+    } catch (e) {
+      // In practice, there seem to be syntactically valid shell commands that
+      // shell-quote cannot parse, so we should not reject, but ask the user.
+      switch (policy) {
+        case "full-auto":
+          // In full-auto, we still run the command automatically, but must
+          // restrict it to the sandbox.
+          return {
+            type: "auto-approve",
+            reason: "Full auto mode",
+            group: "Running commands",
+            runInSandbox: true,
+          };
+        case "suggest":
+        case "auto-edit":
+          // In all other modes, since we cannot reason about the command, we
+          // should ask the user.
+          return {
+            type: "ask-user",
+          };
+      }
+    }
+
+    // bashCmd could be a mix of strings and operators, e.g.:
+    //   "ls || (true && pwd)" => [ 'ls', { op: '||' }, '(', 'true', { op: '&&' }, 'pwd', ')' ]
+    // We try to ensure that *every* command segment is deemed safe and that
+    // all operators belong to an allow‑list. If so, the entire expression is
+    // considered auto‑approvable.
+
+    const shellSafe = isEntireShellExpressionSafe(bashCmd);
+    if (shellSafe != null) {
+      const { reason, group } = shellSafe;
      return {
        type: "auto-approve",
        reason,
@@ -95,58 +148,16 @@ export function canAutoApprove(
        runInSandbox: false,
      };
    }
+  }

-    if (
-      command[0] === "bash" &&
-      command[1] === "-lc" &&
-      typeof command[2] === "string" &&
-      command.length === 3
-    ) {
-      const applyPatchArg = tryParseApplyPatch(command[2]);
-      if (applyPatchArg != null) {
-        return canAutoApproveApplyPatch(applyPatchArg, writableRoots, policy);
-      }
-
-      const bashCmd = parse(command[2], env);
-
-      // bashCmd could be a mix of strings and operators, e.g.:
-      //   "ls || (true && pwd)" => [ 'ls', { op: '||' }, '(', 'true', { op: '&&' }, 'pwd', ')' ]
-      // We try to ensure that *every* command segment is deemed safe and that
-      // all operators belong to an allow‑list. If so, the entire expression is
-      // considered auto‑approvable.
-
-      const shellSafe = isEntireShellExpressionSafe(bashCmd);
-      if (shellSafe != null) {
-        const { reason, group } = shellSafe;
-        return {
-          type: "auto-approve",
-          reason,
-          group,
-          runInSandbox: false,
-        };
-      }
-    }
-
-    return policy === "full-auto"
-      ? {
-          type: "auto-approve",
-          reason: "Full auto mode",
-          group: "Running commands",
-          runInSandbox: true,
-        }
-      : { type: "ask-user" };
-  } catch (err) {
-    if (policy === "full-auto") {
-      return {
+  return policy === "full-auto"
+    ? {
        type: "auto-approve",
        reason: "Full auto mode",
        group: "Running commands",
        runInSandbox: true,
-      };
-    } else {
-      return { type: "ask-user" };
-    }
-  }
+      }
+    : { type: "ask-user" };
 }

 function canAutoApproveApplyPatch(
--- a/codex-cli/src/components/chat/use-message-grouping.ts
+++ b/codex-cli/src/components/chat/use-message-grouping.ts
@@ -1,8 +1,5 @@
 import type { ResponseItem } from "openai/resources/responses/responses.mjs";

-import { parseToolCall } from "../../utils/parsers.js";
-import { useMemo } from "react";
-
 /**
 * Represents a grouped sequence of response items (e.g., function call batches).
 */
@@ -10,72 +7,3 @@ export type GroupedResponseItem = {
  label: string;
  items: Array<ResponseItem>;
 };
-
-/**
- * Custom hook to group recent response items for display batching.
- * Returns counts of auto-approved tool call groups, the latest batch,
- * and the count of user messages in the visible window.
- */
-export function useMessageGrouping(visibleItems: Array<ResponseItem>): {
-  groupCounts: Record<string, number>;
-  batch: Array<{ item?: ResponseItem; group?: GroupedResponseItem }>;
-  userMsgCount: number;
-} {
-  return useMemo(() => {
-    // The grouping logic only depends on the subset of messages that are
-    // currently rendered (visibleItems).  Using that as the sole dependency
-    // keeps recomputations to a minimum and avoids unnecessary work when the
-    // full list of `items` changes outside of the visible window.
-    let userMsgCount = 0;
-    const groupCounts: Record<string, number> = {};
-    visibleItems.forEach((m) => {
-      if (m.type === "function_call") {
-        const toolCall = parseToolCall(m);
-        if (toolCall?.autoApproval) {
-          const group = toolCall.autoApproval.group;
-          groupCounts[group] = (groupCounts[group] || 0) + 1;
-        }
-      }
-      if (m.type === "message" && m.role === "user") {
-        userMsgCount++;
-      }
-    });
-    const lastFew = visibleItems.slice(-3);
-    const batch: Array<{ item?: ResponseItem; group?: GroupedResponseItem }> =
-      [];
-    if (lastFew[0]?.type === "function_call") {
-      const toolCall = parseToolCall(lastFew[0]);
-      batch.push({
-        group: {
-          label: toolCall?.autoApproval?.group || "Running command",
-          items: lastFew,
-        },
-      });
-      if (lastFew[2]?.type === "message") {
-        batch.push({ item: lastFew[2] });
-      }
-    } else if (lastFew[1]?.type === "function_call") {
-      const toolCall = parseToolCall(lastFew[1]);
-      batch.push({
-        group: {
-          label: toolCall?.autoApproval?.group || "Running command",
-          items: lastFew.slice(1),
-        },
-      });
-    } else if (lastFew[2]?.type === "function_call") {
-      const toolCall = parseToolCall(lastFew[2]);
-      batch.push({
-        group: {
-          label: toolCall?.autoApproval?.group || "Running command",
-          items: [lastFew[2]],
-        },
-      });
-    } else {
-      lastFew.forEach((item) => batch.push({ item }));
-    }
-    return { groupCounts, batch, userMsgCount };
-    // `items` is stable across renders while `visibleItems` changes based on
-    // the scroll window. Including only `visibleItems` avoids unnecessary
-    // recomputations while still producing correct results.
-  }, [visibleItems]);
-}
--- a/codex-cli/src/utils/agent/review.ts
+++ b/codex-cli/src/utils/agent/review.ts
@@ -1,11 +1,3 @@
-import type { SafeCommandReason } from "../../approvals";
-
-export type CommandReviewDetails = {
-  cmd: Array<string>;
-  cmdReadableText: string;
-  autoApproval: SafeCommandReason | null;
-};
-
 export enum ReviewDecision {
  YES = "yes",
  NO_CONTINUE = "no-continue",
--- a/codex-cli/src/utils/parsers.ts
+++ b/codex-cli/src/utils/parsers.ts
@@ -1,30 +1,16 @@
-import type { CommandReviewDetails } from "./agent/review.js";
 import type {
  ExecInput,
  ExecOutputMetadata,
 } from "./agent/sandbox/interface.js";
 import type { ResponseFunctionToolCall } from "openai/resources/responses/responses.mjs";

-import { isSafeCommand, type SafeCommandReason } from "../approvals.js";
 import { log } from "node:console";
-import process from "process";
-import { parse } from "shell-quote";
 import { formatCommandForDisplay } from "src/format-command.js";

 // The console utility import is intentionally explicit to avoid bundlers from
 // including the entire `console` module when only the `log` function is
 // required.

-// Allowed shell operators that we consider "safe" as they do not introduce
-// side‑effects on their own (unlike redirections). Parentheses and braces for
-// grouping are excluded for simplicity.
-const SAFE_SHELL_OPERATORS: ReadonlySet<string> = new Set([
-  "&&",
-  "||",
-  "|",
-  ";",
-]);
-
 export function parseToolCallOutput(toolCallOutput: string): {
  output: string;
  metadata: ExecOutputMetadata;
@@ -46,6 +32,17 @@ export function parseToolCallOutput(toolCallOutput: string): {
  }
 }

+export type CommandReviewDetails = {
+  cmd: Array<string>;
+  cmdReadableText: string;
+};
+
+/**
+ * Tries to parse a tool call and, if successful, returns an object that has
+ * both:
+ * - an array of strings to use with `ExecInput` and `canAutoApprove()`
+ * - a human-readable string to display to the user
+ */
 export function parseToolCall(
  toolCall: ResponseFunctionToolCall,
 ): CommandReviewDetails | undefined {
@@ -57,12 +54,9 @@ export function parseToolCall(
  const { cmd } = toolCallArgs;
  const cmdReadableText = formatCommandForDisplay(cmd);

-  const autoApproval = computeAutoApproval(cmd);
-
  return {
    cmd,
    cmdReadableText,
-    autoApproval,
  };
 }

@@ -109,87 +103,3 @@ function toStringArray(obj: unknown): Array<string> | undefined {
    return undefined;
  }
 }
-
-// ---------------- safe‑command helpers ----------------
-
-/**
- * Attempts to determine whether `cmd` is composed exclusively of safe
- * sub‑commands combined using only operators from the SAFE_SHELL_OPERATORS
- * allow‑list. Returns the `SafeCommandReason` (taken from the first sub‑command)
- * if the whole expression is safe; otherwise returns `null`.
- */
-function computeAutoApproval(cmd: Array<string>): SafeCommandReason | null {
-  // Fast path: a simple command with no shell processing.
-  const direct = isSafeCommand(cmd);
-  if (direct != null) {
-    return direct;
-  }
-
-  // For expressions like ["bash", "-lc", "ls && pwd"] break down the inner
-  // string and verify each segment.
-  if (
-    cmd.length === 3 &&
-    cmd[0] === "bash" &&
-    cmd[1] === "-lc" &&
-    typeof cmd[2] === "string"
-  ) {
-    const parsed = parse(cmd[2], process.env ?? {});
-    if (parsed.length === 0) {
-      return null;
-    }
-
-    let current: Array<string> = [];
-    let first: SafeCommandReason | null = null;
-
-    const flush = (): boolean => {
-      if (current.length === 0) {
-        return true;
-      }
-      const safe = isSafeCommand(current);
-      if (safe == null) {
-        return false;
-      }
-      if (!first) {
-        first = safe;
-      }
-      current = [];
-      return true;
-    };
-
-    for (const part of parsed) {
-      if (typeof part === "string") {
-        // Simple word/argument token.
-        if (part === "(" || part === ")" || part === "{" || part === "}") {
-          // We treat explicit grouping tokens as unsafe because their
-          // semantics depend on the shell evaluation environment.
-          return null;
-        }
-        current.push(part);
-      } else if (part && typeof part === "object") {
-        const opToken = part as { op?: string };
-        if (typeof opToken.op === "string") {
-          if (!flush()) {
-            return null;
-          }
-          if (!SAFE_SHELL_OPERATORS.has(opToken.op)) {
-            return null;
-          }
-        } else {
-          // Unknown object token kind (e.g. redirection) – treat as unsafe.
-          return null;
-        }
-      } else {
-        // Token types such as numbers / booleans are unexpected – treat as unsafe.
-        return null;
-      }
-    }
-
-    if (!flush()) {
-      return null;
-    }
-
-    return first;
-  }
-
-  return null;
-}