Removes computeAutoApproval() and tightens up canAutoApprove() as the source of truth (#126)

Previously, `parseToolCall()` was using `computeAutoApproval()`, which
was a somewhat parallel implementation of `canAutoApprove()` in order to
get `SafeCommandReason` metadata for presenting information to the user.
The only function that was using `SafeCommandReason` was
`useMessageGrouping()`, but it turns out that function was unused, so
this PR removes `computeAutoApproval()` and all code related to it.

More importantly, I believe this fixes
https://github.com/openai/codex/issues/87 because
`computeAutoApproval()` was calling `parse()` from `shell-quote` without
wrapping it in a try-catch. This PR updates `canAutoApprove()` to use a
tighter try-catch block that is specific to `parse()` and returns an
appropriate `SafetyAssessment` in the event of an error, based on the
`ApprovalPolicy`.

Signed-off-by: Michael Bolin <mbolin@openai.com>
This commit is contained in:
Michael Bolin
2025-04-16 15:39:41 -07:00
committed by GitHub
parent 72d0230b33
commit fb6f798671
4 changed files with 80 additions and 239 deletions

View File

@@ -75,19 +75,72 @@ export function canAutoApprove(
writableRoots: ReadonlyArray<string>,
env: NodeJS.ProcessEnv = process.env,
): SafetyAssessment {
try {
if (command[0] === "apply_patch") {
return command.length === 2 && typeof command[1] === "string"
? canAutoApproveApplyPatch(command[1], writableRoots, policy)
: {
type: "reject",
reason: "Invalid apply_patch command",
};
if (command[0] === "apply_patch") {
return command.length === 2 && typeof command[1] === "string"
? canAutoApproveApplyPatch(command[1], writableRoots, policy)
: {
type: "reject",
reason: "Invalid apply_patch command",
};
}
const isSafe = isSafeCommand(command);
if (isSafe != null) {
const { reason, group } = isSafe;
return {
type: "auto-approve",
reason,
group,
runInSandbox: false,
};
}
if (
command[0] === "bash" &&
command[1] === "-lc" &&
typeof command[2] === "string" &&
command.length === 3
) {
const applyPatchArg = tryParseApplyPatch(command[2]);
if (applyPatchArg != null) {
return canAutoApproveApplyPatch(applyPatchArg, writableRoots, policy);
}
const isSafe = isSafeCommand(command);
if (isSafe != null) {
const { reason, group } = isSafe;
let bashCmd;
try {
bashCmd = parse(command[2], env);
} catch (e) {
// In practice, there seem to be syntactically valid shell commands that
// shell-quote cannot parse, so we should not reject, but ask the user.
switch (policy) {
case "full-auto":
// In full-auto, we still run the command automatically, but must
// restrict it to the sandbox.
return {
type: "auto-approve",
reason: "Full auto mode",
group: "Running commands",
runInSandbox: true,
};
case "suggest":
case "auto-edit":
// In all other modes, since we cannot reason about the command, we
// should ask the user.
return {
type: "ask-user",
};
}
}
// bashCmd could be a mix of strings and operators, e.g.:
// "ls || (true && pwd)" => [ 'ls', { op: '||' }, '(', 'true', { op: '&&' }, 'pwd', ')' ]
// We try to ensure that *every* command segment is deemed safe and that
// all operators belong to an allowlist. If so, the entire expression is
// considered autoapprovable.
const shellSafe = isEntireShellExpressionSafe(bashCmd);
if (shellSafe != null) {
const { reason, group } = shellSafe;
return {
type: "auto-approve",
reason,
@@ -95,58 +148,16 @@ export function canAutoApprove(
runInSandbox: false,
};
}
}
if (
command[0] === "bash" &&
command[1] === "-lc" &&
typeof command[2] === "string" &&
command.length === 3
) {
const applyPatchArg = tryParseApplyPatch(command[2]);
if (applyPatchArg != null) {
return canAutoApproveApplyPatch(applyPatchArg, writableRoots, policy);
}
const bashCmd = parse(command[2], env);
// bashCmd could be a mix of strings and operators, e.g.:
// "ls || (true && pwd)" => [ 'ls', { op: '||' }, '(', 'true', { op: '&&' }, 'pwd', ')' ]
// We try to ensure that *every* command segment is deemed safe and that
// all operators belong to an allowlist. If so, the entire expression is
// considered autoapprovable.
const shellSafe = isEntireShellExpressionSafe(bashCmd);
if (shellSafe != null) {
const { reason, group } = shellSafe;
return {
type: "auto-approve",
reason,
group,
runInSandbox: false,
};
}
}
return policy === "full-auto"
? {
type: "auto-approve",
reason: "Full auto mode",
group: "Running commands",
runInSandbox: true,
}
: { type: "ask-user" };
} catch (err) {
if (policy === "full-auto") {
return {
return policy === "full-auto"
? {
type: "auto-approve",
reason: "Full auto mode",
group: "Running commands",
runInSandbox: true,
};
} else {
return { type: "ask-user" };
}
}
}
: { type: "ask-user" };
}
function canAutoApproveApplyPatch(

View File

@@ -1,8 +1,5 @@
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
import { parseToolCall } from "../../utils/parsers.js";
import { useMemo } from "react";
/**
* Represents a grouped sequence of response items (e.g., function call batches).
*/
@@ -10,72 +7,3 @@ export type GroupedResponseItem = {
label: string;
items: Array<ResponseItem>;
};
/**
* Custom hook to group recent response items for display batching.
* Returns counts of auto-approved tool call groups, the latest batch,
* and the count of user messages in the visible window.
*/
export function useMessageGrouping(visibleItems: Array<ResponseItem>): {
groupCounts: Record<string, number>;
batch: Array<{ item?: ResponseItem; group?: GroupedResponseItem }>;
userMsgCount: number;
} {
return useMemo(() => {
// The grouping logic only depends on the subset of messages that are
// currently rendered (visibleItems). Using that as the sole dependency
// keeps recomputations to a minimum and avoids unnecessary work when the
// full list of `items` changes outside of the visible window.
let userMsgCount = 0;
const groupCounts: Record<string, number> = {};
visibleItems.forEach((m) => {
if (m.type === "function_call") {
const toolCall = parseToolCall(m);
if (toolCall?.autoApproval) {
const group = toolCall.autoApproval.group;
groupCounts[group] = (groupCounts[group] || 0) + 1;
}
}
if (m.type === "message" && m.role === "user") {
userMsgCount++;
}
});
const lastFew = visibleItems.slice(-3);
const batch: Array<{ item?: ResponseItem; group?: GroupedResponseItem }> =
[];
if (lastFew[0]?.type === "function_call") {
const toolCall = parseToolCall(lastFew[0]);
batch.push({
group: {
label: toolCall?.autoApproval?.group || "Running command",
items: lastFew,
},
});
if (lastFew[2]?.type === "message") {
batch.push({ item: lastFew[2] });
}
} else if (lastFew[1]?.type === "function_call") {
const toolCall = parseToolCall(lastFew[1]);
batch.push({
group: {
label: toolCall?.autoApproval?.group || "Running command",
items: lastFew.slice(1),
},
});
} else if (lastFew[2]?.type === "function_call") {
const toolCall = parseToolCall(lastFew[2]);
batch.push({
group: {
label: toolCall?.autoApproval?.group || "Running command",
items: [lastFew[2]],
},
});
} else {
lastFew.forEach((item) => batch.push({ item }));
}
return { groupCounts, batch, userMsgCount };
// `items` is stable across renders while `visibleItems` changes based on
// the scroll window. Including only `visibleItems` avoids unnecessary
// recomputations while still producing correct results.
}, [visibleItems]);
}

View File

@@ -1,11 +1,3 @@
import type { SafeCommandReason } from "../../approvals";
export type CommandReviewDetails = {
cmd: Array<string>;
cmdReadableText: string;
autoApproval: SafeCommandReason | null;
};
export enum ReviewDecision {
YES = "yes",
NO_CONTINUE = "no-continue",

View File

@@ -1,30 +1,16 @@
import type { CommandReviewDetails } from "./agent/review.js";
import type {
ExecInput,
ExecOutputMetadata,
} from "./agent/sandbox/interface.js";
import type { ResponseFunctionToolCall } from "openai/resources/responses/responses.mjs";
import { isSafeCommand, type SafeCommandReason } from "../approvals.js";
import { log } from "node:console";
import process from "process";
import { parse } from "shell-quote";
import { formatCommandForDisplay } from "src/format-command.js";
// The console utility import is intentionally explicit to avoid bundlers from
// including the entire `console` module when only the `log` function is
// required.
// Allowed shell operators that we consider "safe" as they do not introduce
// sideeffects on their own (unlike redirections). Parentheses and braces for
// grouping are excluded for simplicity.
const SAFE_SHELL_OPERATORS: ReadonlySet<string> = new Set([
"&&",
"||",
"|",
";",
]);
export function parseToolCallOutput(toolCallOutput: string): {
output: string;
metadata: ExecOutputMetadata;
@@ -46,6 +32,17 @@ export function parseToolCallOutput(toolCallOutput: string): {
}
}
export type CommandReviewDetails = {
cmd: Array<string>;
cmdReadableText: string;
};
/**
* Tries to parse a tool call and, if successful, returns an object that has
* both:
* - an array of strings to use with `ExecInput` and `canAutoApprove()`
* - a human-readable string to display to the user
*/
export function parseToolCall(
toolCall: ResponseFunctionToolCall,
): CommandReviewDetails | undefined {
@@ -57,12 +54,9 @@ export function parseToolCall(
const { cmd } = toolCallArgs;
const cmdReadableText = formatCommandForDisplay(cmd);
const autoApproval = computeAutoApproval(cmd);
return {
cmd,
cmdReadableText,
autoApproval,
};
}
@@ -109,87 +103,3 @@ function toStringArray(obj: unknown): Array<string> | undefined {
return undefined;
}
}
// ---------------- safecommand helpers ----------------
/**
* Attempts to determine whether `cmd` is composed exclusively of safe
* subcommands combined using only operators from the SAFE_SHELL_OPERATORS
* allowlist. Returns the `SafeCommandReason` (taken from the first subcommand)
* if the whole expression is safe; otherwise returns `null`.
*/
function computeAutoApproval(cmd: Array<string>): SafeCommandReason | null {
// Fast path: a simple command with no shell processing.
const direct = isSafeCommand(cmd);
if (direct != null) {
return direct;
}
// For expressions like ["bash", "-lc", "ls && pwd"] break down the inner
// string and verify each segment.
if (
cmd.length === 3 &&
cmd[0] === "bash" &&
cmd[1] === "-lc" &&
typeof cmd[2] === "string"
) {
const parsed = parse(cmd[2], process.env ?? {});
if (parsed.length === 0) {
return null;
}
let current: Array<string> = [];
let first: SafeCommandReason | null = null;
const flush = (): boolean => {
if (current.length === 0) {
return true;
}
const safe = isSafeCommand(current);
if (safe == null) {
return false;
}
if (!first) {
first = safe;
}
current = [];
return true;
};
for (const part of parsed) {
if (typeof part === "string") {
// Simple word/argument token.
if (part === "(" || part === ")" || part === "{" || part === "}") {
// We treat explicit grouping tokens as unsafe because their
// semantics depend on the shell evaluation environment.
return null;
}
current.push(part);
} else if (part && typeof part === "object") {
const opToken = part as { op?: string };
if (typeof opToken.op === "string") {
if (!flush()) {
return null;
}
if (!SAFE_SHELL_OPERATORS.has(opToken.op)) {
return null;
}
} else {
// Unknown object token kind (e.g. redirection) treat as unsafe.
return null;
}
} else {
// Token types such as numbers / booleans are unexpected treat as unsafe.
return null;
}
}
if (!flush()) {
return null;
}
return first;
}
return null;
}