Removes computeAutoApproval() and tightens up canAutoApprove() as the source of truth (#126)
Previously, `parseToolCall()` was using `computeAutoApproval()`, which was a somewhat parallel implementation of `canAutoApprove()` in order to get `SafeCommandReason` metadata for presenting information to the user. The only function that was using `SafeCommandReason` was `useMessageGrouping()`, but it turns out that function was unused, so this PR removes `computeAutoApproval()` and all code related to it. More importantly, I believe this fixes https://github.com/openai/codex/issues/87 because `computeAutoApproval()` was calling `parse()` from `shell-quote` without wrapping it in a try-catch. This PR updates `canAutoApprove()` to use a tighter try-catch block that is specific to `parse()` and returns an appropriate `SafetyAssessment` in the event of an error, based on the `ApprovalPolicy`. Signed-off-by: Michael Bolin <mbolin@openai.com>
This commit is contained in:
@@ -75,19 +75,72 @@ export function canAutoApprove(
|
|||||||
writableRoots: ReadonlyArray<string>,
|
writableRoots: ReadonlyArray<string>,
|
||||||
env: NodeJS.ProcessEnv = process.env,
|
env: NodeJS.ProcessEnv = process.env,
|
||||||
): SafetyAssessment {
|
): SafetyAssessment {
|
||||||
try {
|
if (command[0] === "apply_patch") {
|
||||||
if (command[0] === "apply_patch") {
|
return command.length === 2 && typeof command[1] === "string"
|
||||||
return command.length === 2 && typeof command[1] === "string"
|
? canAutoApproveApplyPatch(command[1], writableRoots, policy)
|
||||||
? canAutoApproveApplyPatch(command[1], writableRoots, policy)
|
: {
|
||||||
: {
|
type: "reject",
|
||||||
type: "reject",
|
reason: "Invalid apply_patch command",
|
||||||
reason: "Invalid apply_patch command",
|
};
|
||||||
};
|
}
|
||||||
|
|
||||||
|
const isSafe = isSafeCommand(command);
|
||||||
|
if (isSafe != null) {
|
||||||
|
const { reason, group } = isSafe;
|
||||||
|
return {
|
||||||
|
type: "auto-approve",
|
||||||
|
reason,
|
||||||
|
group,
|
||||||
|
runInSandbox: false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
command[0] === "bash" &&
|
||||||
|
command[1] === "-lc" &&
|
||||||
|
typeof command[2] === "string" &&
|
||||||
|
command.length === 3
|
||||||
|
) {
|
||||||
|
const applyPatchArg = tryParseApplyPatch(command[2]);
|
||||||
|
if (applyPatchArg != null) {
|
||||||
|
return canAutoApproveApplyPatch(applyPatchArg, writableRoots, policy);
|
||||||
}
|
}
|
||||||
|
|
||||||
const isSafe = isSafeCommand(command);
|
let bashCmd;
|
||||||
if (isSafe != null) {
|
try {
|
||||||
const { reason, group } = isSafe;
|
bashCmd = parse(command[2], env);
|
||||||
|
} catch (e) {
|
||||||
|
// In practice, there seem to be syntactically valid shell commands that
|
||||||
|
// shell-quote cannot parse, so we should not reject, but ask the user.
|
||||||
|
switch (policy) {
|
||||||
|
case "full-auto":
|
||||||
|
// In full-auto, we still run the command automatically, but must
|
||||||
|
// restrict it to the sandbox.
|
||||||
|
return {
|
||||||
|
type: "auto-approve",
|
||||||
|
reason: "Full auto mode",
|
||||||
|
group: "Running commands",
|
||||||
|
runInSandbox: true,
|
||||||
|
};
|
||||||
|
case "suggest":
|
||||||
|
case "auto-edit":
|
||||||
|
// In all other modes, since we cannot reason about the command, we
|
||||||
|
// should ask the user.
|
||||||
|
return {
|
||||||
|
type: "ask-user",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// bashCmd could be a mix of strings and operators, e.g.:
|
||||||
|
// "ls || (true && pwd)" => [ 'ls', { op: '||' }, '(', 'true', { op: '&&' }, 'pwd', ')' ]
|
||||||
|
// We try to ensure that *every* command segment is deemed safe and that
|
||||||
|
// all operators belong to an allow‑list. If so, the entire expression is
|
||||||
|
// considered auto‑approvable.
|
||||||
|
|
||||||
|
const shellSafe = isEntireShellExpressionSafe(bashCmd);
|
||||||
|
if (shellSafe != null) {
|
||||||
|
const { reason, group } = shellSafe;
|
||||||
return {
|
return {
|
||||||
type: "auto-approve",
|
type: "auto-approve",
|
||||||
reason,
|
reason,
|
||||||
@@ -95,58 +148,16 @@ export function canAutoApprove(
|
|||||||
runInSandbox: false,
|
runInSandbox: false,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (
|
return policy === "full-auto"
|
||||||
command[0] === "bash" &&
|
? {
|
||||||
command[1] === "-lc" &&
|
|
||||||
typeof command[2] === "string" &&
|
|
||||||
command.length === 3
|
|
||||||
) {
|
|
||||||
const applyPatchArg = tryParseApplyPatch(command[2]);
|
|
||||||
if (applyPatchArg != null) {
|
|
||||||
return canAutoApproveApplyPatch(applyPatchArg, writableRoots, policy);
|
|
||||||
}
|
|
||||||
|
|
||||||
const bashCmd = parse(command[2], env);
|
|
||||||
|
|
||||||
// bashCmd could be a mix of strings and operators, e.g.:
|
|
||||||
// "ls || (true && pwd)" => [ 'ls', { op: '||' }, '(', 'true', { op: '&&' }, 'pwd', ')' ]
|
|
||||||
// We try to ensure that *every* command segment is deemed safe and that
|
|
||||||
// all operators belong to an allow‑list. If so, the entire expression is
|
|
||||||
// considered auto‑approvable.
|
|
||||||
|
|
||||||
const shellSafe = isEntireShellExpressionSafe(bashCmd);
|
|
||||||
if (shellSafe != null) {
|
|
||||||
const { reason, group } = shellSafe;
|
|
||||||
return {
|
|
||||||
type: "auto-approve",
|
|
||||||
reason,
|
|
||||||
group,
|
|
||||||
runInSandbox: false,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return policy === "full-auto"
|
|
||||||
? {
|
|
||||||
type: "auto-approve",
|
|
||||||
reason: "Full auto mode",
|
|
||||||
group: "Running commands",
|
|
||||||
runInSandbox: true,
|
|
||||||
}
|
|
||||||
: { type: "ask-user" };
|
|
||||||
} catch (err) {
|
|
||||||
if (policy === "full-auto") {
|
|
||||||
return {
|
|
||||||
type: "auto-approve",
|
type: "auto-approve",
|
||||||
reason: "Full auto mode",
|
reason: "Full auto mode",
|
||||||
group: "Running commands",
|
group: "Running commands",
|
||||||
runInSandbox: true,
|
runInSandbox: true,
|
||||||
};
|
}
|
||||||
} else {
|
: { type: "ask-user" };
|
||||||
return { type: "ask-user" };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function canAutoApproveApplyPatch(
|
function canAutoApproveApplyPatch(
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
|
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
|
||||||
|
|
||||||
import { parseToolCall } from "../../utils/parsers.js";
|
|
||||||
import { useMemo } from "react";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a grouped sequence of response items (e.g., function call batches).
|
* Represents a grouped sequence of response items (e.g., function call batches).
|
||||||
*/
|
*/
|
||||||
@@ -10,72 +7,3 @@ export type GroupedResponseItem = {
|
|||||||
label: string;
|
label: string;
|
||||||
items: Array<ResponseItem>;
|
items: Array<ResponseItem>;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* Custom hook to group recent response items for display batching.
|
|
||||||
* Returns counts of auto-approved tool call groups, the latest batch,
|
|
||||||
* and the count of user messages in the visible window.
|
|
||||||
*/
|
|
||||||
export function useMessageGrouping(visibleItems: Array<ResponseItem>): {
|
|
||||||
groupCounts: Record<string, number>;
|
|
||||||
batch: Array<{ item?: ResponseItem; group?: GroupedResponseItem }>;
|
|
||||||
userMsgCount: number;
|
|
||||||
} {
|
|
||||||
return useMemo(() => {
|
|
||||||
// The grouping logic only depends on the subset of messages that are
|
|
||||||
// currently rendered (visibleItems). Using that as the sole dependency
|
|
||||||
// keeps recomputations to a minimum and avoids unnecessary work when the
|
|
||||||
// full list of `items` changes outside of the visible window.
|
|
||||||
let userMsgCount = 0;
|
|
||||||
const groupCounts: Record<string, number> = {};
|
|
||||||
visibleItems.forEach((m) => {
|
|
||||||
if (m.type === "function_call") {
|
|
||||||
const toolCall = parseToolCall(m);
|
|
||||||
if (toolCall?.autoApproval) {
|
|
||||||
const group = toolCall.autoApproval.group;
|
|
||||||
groupCounts[group] = (groupCounts[group] || 0) + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (m.type === "message" && m.role === "user") {
|
|
||||||
userMsgCount++;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
const lastFew = visibleItems.slice(-3);
|
|
||||||
const batch: Array<{ item?: ResponseItem; group?: GroupedResponseItem }> =
|
|
||||||
[];
|
|
||||||
if (lastFew[0]?.type === "function_call") {
|
|
||||||
const toolCall = parseToolCall(lastFew[0]);
|
|
||||||
batch.push({
|
|
||||||
group: {
|
|
||||||
label: toolCall?.autoApproval?.group || "Running command",
|
|
||||||
items: lastFew,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
if (lastFew[2]?.type === "message") {
|
|
||||||
batch.push({ item: lastFew[2] });
|
|
||||||
}
|
|
||||||
} else if (lastFew[1]?.type === "function_call") {
|
|
||||||
const toolCall = parseToolCall(lastFew[1]);
|
|
||||||
batch.push({
|
|
||||||
group: {
|
|
||||||
label: toolCall?.autoApproval?.group || "Running command",
|
|
||||||
items: lastFew.slice(1),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
} else if (lastFew[2]?.type === "function_call") {
|
|
||||||
const toolCall = parseToolCall(lastFew[2]);
|
|
||||||
batch.push({
|
|
||||||
group: {
|
|
||||||
label: toolCall?.autoApproval?.group || "Running command",
|
|
||||||
items: [lastFew[2]],
|
|
||||||
},
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
lastFew.forEach((item) => batch.push({ item }));
|
|
||||||
}
|
|
||||||
return { groupCounts, batch, userMsgCount };
|
|
||||||
// `items` is stable across renders while `visibleItems` changes based on
|
|
||||||
// the scroll window. Including only `visibleItems` avoids unnecessary
|
|
||||||
// recomputations while still producing correct results.
|
|
||||||
}, [visibleItems]);
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,11 +1,3 @@
|
|||||||
import type { SafeCommandReason } from "../../approvals";
|
|
||||||
|
|
||||||
export type CommandReviewDetails = {
|
|
||||||
cmd: Array<string>;
|
|
||||||
cmdReadableText: string;
|
|
||||||
autoApproval: SafeCommandReason | null;
|
|
||||||
};
|
|
||||||
|
|
||||||
export enum ReviewDecision {
|
export enum ReviewDecision {
|
||||||
YES = "yes",
|
YES = "yes",
|
||||||
NO_CONTINUE = "no-continue",
|
NO_CONTINUE = "no-continue",
|
||||||
|
|||||||
@@ -1,30 +1,16 @@
|
|||||||
import type { CommandReviewDetails } from "./agent/review.js";
|
|
||||||
import type {
|
import type {
|
||||||
ExecInput,
|
ExecInput,
|
||||||
ExecOutputMetadata,
|
ExecOutputMetadata,
|
||||||
} from "./agent/sandbox/interface.js";
|
} from "./agent/sandbox/interface.js";
|
||||||
import type { ResponseFunctionToolCall } from "openai/resources/responses/responses.mjs";
|
import type { ResponseFunctionToolCall } from "openai/resources/responses/responses.mjs";
|
||||||
|
|
||||||
import { isSafeCommand, type SafeCommandReason } from "../approvals.js";
|
|
||||||
import { log } from "node:console";
|
import { log } from "node:console";
|
||||||
import process from "process";
|
|
||||||
import { parse } from "shell-quote";
|
|
||||||
import { formatCommandForDisplay } from "src/format-command.js";
|
import { formatCommandForDisplay } from "src/format-command.js";
|
||||||
|
|
||||||
// The console utility import is intentionally explicit to avoid bundlers from
|
// The console utility import is intentionally explicit to avoid bundlers from
|
||||||
// including the entire `console` module when only the `log` function is
|
// including the entire `console` module when only the `log` function is
|
||||||
// required.
|
// required.
|
||||||
|
|
||||||
// Allowed shell operators that we consider "safe" as they do not introduce
|
|
||||||
// side‑effects on their own (unlike redirections). Parentheses and braces for
|
|
||||||
// grouping are excluded for simplicity.
|
|
||||||
const SAFE_SHELL_OPERATORS: ReadonlySet<string> = new Set([
|
|
||||||
"&&",
|
|
||||||
"||",
|
|
||||||
"|",
|
|
||||||
";",
|
|
||||||
]);
|
|
||||||
|
|
||||||
export function parseToolCallOutput(toolCallOutput: string): {
|
export function parseToolCallOutput(toolCallOutput: string): {
|
||||||
output: string;
|
output: string;
|
||||||
metadata: ExecOutputMetadata;
|
metadata: ExecOutputMetadata;
|
||||||
@@ -46,6 +32,17 @@ export function parseToolCallOutput(toolCallOutput: string): {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type CommandReviewDetails = {
|
||||||
|
cmd: Array<string>;
|
||||||
|
cmdReadableText: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tries to parse a tool call and, if successful, returns an object that has
|
||||||
|
* both:
|
||||||
|
* - an array of strings to use with `ExecInput` and `canAutoApprove()`
|
||||||
|
* - a human-readable string to display to the user
|
||||||
|
*/
|
||||||
export function parseToolCall(
|
export function parseToolCall(
|
||||||
toolCall: ResponseFunctionToolCall,
|
toolCall: ResponseFunctionToolCall,
|
||||||
): CommandReviewDetails | undefined {
|
): CommandReviewDetails | undefined {
|
||||||
@@ -57,12 +54,9 @@ export function parseToolCall(
|
|||||||
const { cmd } = toolCallArgs;
|
const { cmd } = toolCallArgs;
|
||||||
const cmdReadableText = formatCommandForDisplay(cmd);
|
const cmdReadableText = formatCommandForDisplay(cmd);
|
||||||
|
|
||||||
const autoApproval = computeAutoApproval(cmd);
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
cmd,
|
cmd,
|
||||||
cmdReadableText,
|
cmdReadableText,
|
||||||
autoApproval,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,87 +103,3 @@ function toStringArray(obj: unknown): Array<string> | undefined {
|
|||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------- safe‑command helpers ----------------
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Attempts to determine whether `cmd` is composed exclusively of safe
|
|
||||||
* sub‑commands combined using only operators from the SAFE_SHELL_OPERATORS
|
|
||||||
* allow‑list. Returns the `SafeCommandReason` (taken from the first sub‑command)
|
|
||||||
* if the whole expression is safe; otherwise returns `null`.
|
|
||||||
*/
|
|
||||||
function computeAutoApproval(cmd: Array<string>): SafeCommandReason | null {
|
|
||||||
// Fast path: a simple command with no shell processing.
|
|
||||||
const direct = isSafeCommand(cmd);
|
|
||||||
if (direct != null) {
|
|
||||||
return direct;
|
|
||||||
}
|
|
||||||
|
|
||||||
// For expressions like ["bash", "-lc", "ls && pwd"] break down the inner
|
|
||||||
// string and verify each segment.
|
|
||||||
if (
|
|
||||||
cmd.length === 3 &&
|
|
||||||
cmd[0] === "bash" &&
|
|
||||||
cmd[1] === "-lc" &&
|
|
||||||
typeof cmd[2] === "string"
|
|
||||||
) {
|
|
||||||
const parsed = parse(cmd[2], process.env ?? {});
|
|
||||||
if (parsed.length === 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
let current: Array<string> = [];
|
|
||||||
let first: SafeCommandReason | null = null;
|
|
||||||
|
|
||||||
const flush = (): boolean => {
|
|
||||||
if (current.length === 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
const safe = isSafeCommand(current);
|
|
||||||
if (safe == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!first) {
|
|
||||||
first = safe;
|
|
||||||
}
|
|
||||||
current = [];
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
for (const part of parsed) {
|
|
||||||
if (typeof part === "string") {
|
|
||||||
// Simple word/argument token.
|
|
||||||
if (part === "(" || part === ")" || part === "{" || part === "}") {
|
|
||||||
// We treat explicit grouping tokens as unsafe because their
|
|
||||||
// semantics depend on the shell evaluation environment.
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
current.push(part);
|
|
||||||
} else if (part && typeof part === "object") {
|
|
||||||
const opToken = part as { op?: string };
|
|
||||||
if (typeof opToken.op === "string") {
|
|
||||||
if (!flush()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (!SAFE_SHELL_OPERATORS.has(opToken.op)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Unknown object token kind (e.g. redirection) – treat as unsafe.
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Token types such as numbers / booleans are unexpected – treat as unsafe.
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!flush()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return first;
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user