Removes computeAutoApproval() and tightens up canAutoApprove() as the source of truth (#126)
Previously, `parseToolCall()` was using `computeAutoApproval()`, which was a somewhat parallel implementation of `canAutoApprove()` in order to get `SafeCommandReason` metadata for presenting information to the user. The only function that was using `SafeCommandReason` was `useMessageGrouping()`, but it turns out that function was unused, so this PR removes `computeAutoApproval()` and all code related to it. More importantly, I believe this fixes https://github.com/openai/codex/issues/87 because `computeAutoApproval()` was calling `parse()` from `shell-quote` without wrapping it in a try-catch. This PR updates `canAutoApprove()` to use a tighter try-catch block that is specific to `parse()` and returns an appropriate `SafetyAssessment` in the event of an error, based on the `ApprovalPolicy`. Signed-off-by: Michael Bolin <mbolin@openai.com>
This commit is contained in:
@@ -75,19 +75,72 @@ export function canAutoApprove(
|
||||
writableRoots: ReadonlyArray<string>,
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): SafetyAssessment {
|
||||
try {
|
||||
if (command[0] === "apply_patch") {
|
||||
return command.length === 2 && typeof command[1] === "string"
|
||||
? canAutoApproveApplyPatch(command[1], writableRoots, policy)
|
||||
: {
|
||||
type: "reject",
|
||||
reason: "Invalid apply_patch command",
|
||||
};
|
||||
if (command[0] === "apply_patch") {
|
||||
return command.length === 2 && typeof command[1] === "string"
|
||||
? canAutoApproveApplyPatch(command[1], writableRoots, policy)
|
||||
: {
|
||||
type: "reject",
|
||||
reason: "Invalid apply_patch command",
|
||||
};
|
||||
}
|
||||
|
||||
const isSafe = isSafeCommand(command);
|
||||
if (isSafe != null) {
|
||||
const { reason, group } = isSafe;
|
||||
return {
|
||||
type: "auto-approve",
|
||||
reason,
|
||||
group,
|
||||
runInSandbox: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (
|
||||
command[0] === "bash" &&
|
||||
command[1] === "-lc" &&
|
||||
typeof command[2] === "string" &&
|
||||
command.length === 3
|
||||
) {
|
||||
const applyPatchArg = tryParseApplyPatch(command[2]);
|
||||
if (applyPatchArg != null) {
|
||||
return canAutoApproveApplyPatch(applyPatchArg, writableRoots, policy);
|
||||
}
|
||||
|
||||
const isSafe = isSafeCommand(command);
|
||||
if (isSafe != null) {
|
||||
const { reason, group } = isSafe;
|
||||
let bashCmd;
|
||||
try {
|
||||
bashCmd = parse(command[2], env);
|
||||
} catch (e) {
|
||||
// In practice, there seem to be syntactically valid shell commands that
|
||||
// shell-quote cannot parse, so we should not reject, but ask the user.
|
||||
switch (policy) {
|
||||
case "full-auto":
|
||||
// In full-auto, we still run the command automatically, but must
|
||||
// restrict it to the sandbox.
|
||||
return {
|
||||
type: "auto-approve",
|
||||
reason: "Full auto mode",
|
||||
group: "Running commands",
|
||||
runInSandbox: true,
|
||||
};
|
||||
case "suggest":
|
||||
case "auto-edit":
|
||||
// In all other modes, since we cannot reason about the command, we
|
||||
// should ask the user.
|
||||
return {
|
||||
type: "ask-user",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// bashCmd could be a mix of strings and operators, e.g.:
|
||||
// "ls || (true && pwd)" => [ 'ls', { op: '||' }, '(', 'true', { op: '&&' }, 'pwd', ')' ]
|
||||
// We try to ensure that *every* command segment is deemed safe and that
|
||||
// all operators belong to an allow‑list. If so, the entire expression is
|
||||
// considered auto‑approvable.
|
||||
|
||||
const shellSafe = isEntireShellExpressionSafe(bashCmd);
|
||||
if (shellSafe != null) {
|
||||
const { reason, group } = shellSafe;
|
||||
return {
|
||||
type: "auto-approve",
|
||||
reason,
|
||||
@@ -95,58 +148,16 @@ export function canAutoApprove(
|
||||
runInSandbox: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
command[0] === "bash" &&
|
||||
command[1] === "-lc" &&
|
||||
typeof command[2] === "string" &&
|
||||
command.length === 3
|
||||
) {
|
||||
const applyPatchArg = tryParseApplyPatch(command[2]);
|
||||
if (applyPatchArg != null) {
|
||||
return canAutoApproveApplyPatch(applyPatchArg, writableRoots, policy);
|
||||
}
|
||||
|
||||
const bashCmd = parse(command[2], env);
|
||||
|
||||
// bashCmd could be a mix of strings and operators, e.g.:
|
||||
// "ls || (true && pwd)" => [ 'ls', { op: '||' }, '(', 'true', { op: '&&' }, 'pwd', ')' ]
|
||||
// We try to ensure that *every* command segment is deemed safe and that
|
||||
// all operators belong to an allow‑list. If so, the entire expression is
|
||||
// considered auto‑approvable.
|
||||
|
||||
const shellSafe = isEntireShellExpressionSafe(bashCmd);
|
||||
if (shellSafe != null) {
|
||||
const { reason, group } = shellSafe;
|
||||
return {
|
||||
type: "auto-approve",
|
||||
reason,
|
||||
group,
|
||||
runInSandbox: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return policy === "full-auto"
|
||||
? {
|
||||
type: "auto-approve",
|
||||
reason: "Full auto mode",
|
||||
group: "Running commands",
|
||||
runInSandbox: true,
|
||||
}
|
||||
: { type: "ask-user" };
|
||||
} catch (err) {
|
||||
if (policy === "full-auto") {
|
||||
return {
|
||||
return policy === "full-auto"
|
||||
? {
|
||||
type: "auto-approve",
|
||||
reason: "Full auto mode",
|
||||
group: "Running commands",
|
||||
runInSandbox: true,
|
||||
};
|
||||
} else {
|
||||
return { type: "ask-user" };
|
||||
}
|
||||
}
|
||||
}
|
||||
: { type: "ask-user" };
|
||||
}
|
||||
|
||||
function canAutoApproveApplyPatch(
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
|
||||
|
||||
import { parseToolCall } from "../../utils/parsers.js";
|
||||
import { useMemo } from "react";
|
||||
|
||||
/**
|
||||
* Represents a grouped sequence of response items (e.g., function call batches).
|
||||
*/
|
||||
@@ -10,72 +7,3 @@ export type GroupedResponseItem = {
|
||||
label: string;
|
||||
items: Array<ResponseItem>;
|
||||
};
|
||||
|
||||
/**
|
||||
* Custom hook to group recent response items for display batching.
|
||||
* Returns counts of auto-approved tool call groups, the latest batch,
|
||||
* and the count of user messages in the visible window.
|
||||
*/
|
||||
export function useMessageGrouping(visibleItems: Array<ResponseItem>): {
|
||||
groupCounts: Record<string, number>;
|
||||
batch: Array<{ item?: ResponseItem; group?: GroupedResponseItem }>;
|
||||
userMsgCount: number;
|
||||
} {
|
||||
return useMemo(() => {
|
||||
// The grouping logic only depends on the subset of messages that are
|
||||
// currently rendered (visibleItems). Using that as the sole dependency
|
||||
// keeps recomputations to a minimum and avoids unnecessary work when the
|
||||
// full list of `items` changes outside of the visible window.
|
||||
let userMsgCount = 0;
|
||||
const groupCounts: Record<string, number> = {};
|
||||
visibleItems.forEach((m) => {
|
||||
if (m.type === "function_call") {
|
||||
const toolCall = parseToolCall(m);
|
||||
if (toolCall?.autoApproval) {
|
||||
const group = toolCall.autoApproval.group;
|
||||
groupCounts[group] = (groupCounts[group] || 0) + 1;
|
||||
}
|
||||
}
|
||||
if (m.type === "message" && m.role === "user") {
|
||||
userMsgCount++;
|
||||
}
|
||||
});
|
||||
const lastFew = visibleItems.slice(-3);
|
||||
const batch: Array<{ item?: ResponseItem; group?: GroupedResponseItem }> =
|
||||
[];
|
||||
if (lastFew[0]?.type === "function_call") {
|
||||
const toolCall = parseToolCall(lastFew[0]);
|
||||
batch.push({
|
||||
group: {
|
||||
label: toolCall?.autoApproval?.group || "Running command",
|
||||
items: lastFew,
|
||||
},
|
||||
});
|
||||
if (lastFew[2]?.type === "message") {
|
||||
batch.push({ item: lastFew[2] });
|
||||
}
|
||||
} else if (lastFew[1]?.type === "function_call") {
|
||||
const toolCall = parseToolCall(lastFew[1]);
|
||||
batch.push({
|
||||
group: {
|
||||
label: toolCall?.autoApproval?.group || "Running command",
|
||||
items: lastFew.slice(1),
|
||||
},
|
||||
});
|
||||
} else if (lastFew[2]?.type === "function_call") {
|
||||
const toolCall = parseToolCall(lastFew[2]);
|
||||
batch.push({
|
||||
group: {
|
||||
label: toolCall?.autoApproval?.group || "Running command",
|
||||
items: [lastFew[2]],
|
||||
},
|
||||
});
|
||||
} else {
|
||||
lastFew.forEach((item) => batch.push({ item }));
|
||||
}
|
||||
return { groupCounts, batch, userMsgCount };
|
||||
// `items` is stable across renders while `visibleItems` changes based on
|
||||
// the scroll window. Including only `visibleItems` avoids unnecessary
|
||||
// recomputations while still producing correct results.
|
||||
}, [visibleItems]);
|
||||
}
|
||||
|
||||
@@ -1,11 +1,3 @@
|
||||
import type { SafeCommandReason } from "../../approvals";
|
||||
|
||||
export type CommandReviewDetails = {
|
||||
cmd: Array<string>;
|
||||
cmdReadableText: string;
|
||||
autoApproval: SafeCommandReason | null;
|
||||
};
|
||||
|
||||
export enum ReviewDecision {
|
||||
YES = "yes",
|
||||
NO_CONTINUE = "no-continue",
|
||||
|
||||
@@ -1,30 +1,16 @@
|
||||
import type { CommandReviewDetails } from "./agent/review.js";
|
||||
import type {
|
||||
ExecInput,
|
||||
ExecOutputMetadata,
|
||||
} from "./agent/sandbox/interface.js";
|
||||
import type { ResponseFunctionToolCall } from "openai/resources/responses/responses.mjs";
|
||||
|
||||
import { isSafeCommand, type SafeCommandReason } from "../approvals.js";
|
||||
import { log } from "node:console";
|
||||
import process from "process";
|
||||
import { parse } from "shell-quote";
|
||||
import { formatCommandForDisplay } from "src/format-command.js";
|
||||
|
||||
// The console utility import is intentionally explicit to avoid bundlers from
|
||||
// including the entire `console` module when only the `log` function is
|
||||
// required.
|
||||
|
||||
// Allowed shell operators that we consider "safe" as they do not introduce
|
||||
// side‑effects on their own (unlike redirections). Parentheses and braces for
|
||||
// grouping are excluded for simplicity.
|
||||
const SAFE_SHELL_OPERATORS: ReadonlySet<string> = new Set([
|
||||
"&&",
|
||||
"||",
|
||||
"|",
|
||||
";",
|
||||
]);
|
||||
|
||||
export function parseToolCallOutput(toolCallOutput: string): {
|
||||
output: string;
|
||||
metadata: ExecOutputMetadata;
|
||||
@@ -46,6 +32,17 @@ export function parseToolCallOutput(toolCallOutput: string): {
|
||||
}
|
||||
}
|
||||
|
||||
export type CommandReviewDetails = {
|
||||
cmd: Array<string>;
|
||||
cmdReadableText: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Tries to parse a tool call and, if successful, returns an object that has
|
||||
* both:
|
||||
* - an array of strings to use with `ExecInput` and `canAutoApprove()`
|
||||
* - a human-readable string to display to the user
|
||||
*/
|
||||
export function parseToolCall(
|
||||
toolCall: ResponseFunctionToolCall,
|
||||
): CommandReviewDetails | undefined {
|
||||
@@ -57,12 +54,9 @@ export function parseToolCall(
|
||||
const { cmd } = toolCallArgs;
|
||||
const cmdReadableText = formatCommandForDisplay(cmd);
|
||||
|
||||
const autoApproval = computeAutoApproval(cmd);
|
||||
|
||||
return {
|
||||
cmd,
|
||||
cmdReadableText,
|
||||
autoApproval,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -109,87 +103,3 @@ function toStringArray(obj: unknown): Array<string> | undefined {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------- safe‑command helpers ----------------
|
||||
|
||||
/**
|
||||
* Attempts to determine whether `cmd` is composed exclusively of safe
|
||||
* sub‑commands combined using only operators from the SAFE_SHELL_OPERATORS
|
||||
* allow‑list. Returns the `SafeCommandReason` (taken from the first sub‑command)
|
||||
* if the whole expression is safe; otherwise returns `null`.
|
||||
*/
|
||||
function computeAutoApproval(cmd: Array<string>): SafeCommandReason | null {
|
||||
// Fast path: a simple command with no shell processing.
|
||||
const direct = isSafeCommand(cmd);
|
||||
if (direct != null) {
|
||||
return direct;
|
||||
}
|
||||
|
||||
// For expressions like ["bash", "-lc", "ls && pwd"] break down the inner
|
||||
// string and verify each segment.
|
||||
if (
|
||||
cmd.length === 3 &&
|
||||
cmd[0] === "bash" &&
|
||||
cmd[1] === "-lc" &&
|
||||
typeof cmd[2] === "string"
|
||||
) {
|
||||
const parsed = parse(cmd[2], process.env ?? {});
|
||||
if (parsed.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let current: Array<string> = [];
|
||||
let first: SafeCommandReason | null = null;
|
||||
|
||||
const flush = (): boolean => {
|
||||
if (current.length === 0) {
|
||||
return true;
|
||||
}
|
||||
const safe = isSafeCommand(current);
|
||||
if (safe == null) {
|
||||
return false;
|
||||
}
|
||||
if (!first) {
|
||||
first = safe;
|
||||
}
|
||||
current = [];
|
||||
return true;
|
||||
};
|
||||
|
||||
for (const part of parsed) {
|
||||
if (typeof part === "string") {
|
||||
// Simple word/argument token.
|
||||
if (part === "(" || part === ")" || part === "{" || part === "}") {
|
||||
// We treat explicit grouping tokens as unsafe because their
|
||||
// semantics depend on the shell evaluation environment.
|
||||
return null;
|
||||
}
|
||||
current.push(part);
|
||||
} else if (part && typeof part === "object") {
|
||||
const opToken = part as { op?: string };
|
||||
if (typeof opToken.op === "string") {
|
||||
if (!flush()) {
|
||||
return null;
|
||||
}
|
||||
if (!SAFE_SHELL_OPERATORS.has(opToken.op)) {
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
// Unknown object token kind (e.g. redirection) – treat as unsafe.
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
// Token types such as numbers / booleans are unexpected – treat as unsafe.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
if (!flush()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return first;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user