diff --git a/codex-rs/common/src/approval_mode_cli_arg.rs b/codex-rs/common/src/approval_mode_cli_arg.rs index a74ceb2b..e8c06826 100644 --- a/codex-rs/common/src/approval_mode_cli_arg.rs +++ b/codex-rs/common/src/approval_mode_cli_arg.rs @@ -18,6 +18,9 @@ pub enum ApprovalModeCliArg { /// will escalate to the user to ask for un-sandboxed execution. OnFailure, + /// The model decides when to ask the user for approval. + OnRequest, + /// Never ask for user approval /// Execution failures are immediately returned to the model. Never, @@ -28,6 +31,7 @@ impl From for AskForApproval { match value { ApprovalModeCliArg::Untrusted => AskForApproval::UnlessTrusted, ApprovalModeCliArg::OnFailure => AskForApproval::OnFailure, + ApprovalModeCliArg::OnRequest => AskForApproval::OnRequest, ApprovalModeCliArg::Never => AskForApproval::Never, } } diff --git a/codex-rs/config.md b/codex-rs/config.md index 992fe1aa..f93a35eb 100644 --- a/codex-rs/config.md +++ b/codex-rs/config.md @@ -148,12 +148,20 @@ Determines when the user should be prompted to approve whether Codex can execute approval_policy = "untrusted" ``` +If you want to be notified whenever a command fails, use "on-failure": ```toml # If the command fails when run in the sandbox, Codex asks for permission to # retry the command outside the sandbox. approval_policy = "on-failure" ``` +If you want the model to run until it decides that it needs to ask you for escalated permissions, use "on-request": +```toml +# The model decides when to escalate +approval_policy = "on-request" +``` + +Alternatively, you can have the model run until it is done, and never ask to run a command with escalated permissions: ```toml # User is never prompted: if the command fails, Codex will automatically try # something out. Note the `exec` subcommand always uses this mode. diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 9c3a25c2..a7ab664e 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -813,7 +813,12 @@ async fn submission_loop( let default_shell = shell::default_user_shell().await; sess = Some(Arc::new(Session { client, - tools_config: ToolsConfig::new(&config.model_family, config.include_plan_tool), + tools_config: ToolsConfig::new( + &config.model_family, + approval_policy, + sandbox_policy.clone(), + config.include_plan_tool, + ), tx_event: tx_event.clone(), ctrl_c: Arc::clone(&ctrl_c), user_instructions, @@ -1588,6 +1593,8 @@ async fn handle_response_item( command: action.command, workdir: action.working_directory, timeout_ms: action.timeout_ms, + with_escalated_permissions: None, + justification: None, }; let effective_call_id = match (call_id, id) { (Some(call_id), _) => call_id, @@ -1676,6 +1683,8 @@ fn to_exec_params(params: ShellToolCallParams, sess: &Session) -> ExecParams { cwd: sess.resolve_path(params.workdir.clone()), timeout_ms: params.timeout_ms, env: create_env(&sess.shell_environment_policy), + with_escalated_permissions: params.with_escalated_permissions, + justification: params.justification, } } @@ -1776,13 +1785,19 @@ async fn handle_container_exec_with_params( cwd: cwd.clone(), timeout_ms: params.timeout_ms, env: HashMap::new(), + with_escalated_permissions: params.with_escalated_permissions, + justification: params.justification.clone(), }; let safety = if *user_explicitly_approved_this_action { SafetyCheck::AutoApprove { sandbox_type: SandboxType::None, } } else { - assess_safety_for_untrusted_command(sess.approval_policy, &sess.sandbox_policy) + assess_safety_for_untrusted_command( + sess.approval_policy, + &sess.sandbox_policy, + params.with_escalated_permissions.unwrap_or(false), + ) }; ( params, @@ -1798,6 +1813,7 @@ async fn handle_container_exec_with_params( sess.approval_policy, &sess.sandbox_policy, &state.approved_commands, + params.with_escalated_permissions.unwrap_or(false), ) }; let command_for_display = params.command.clone(); @@ -1814,7 +1830,7 @@ async fn handle_container_exec_with_params( call_id.clone(), params.command.clone(), params.cwd.clone(), - None, + params.justification.clone(), ) .await; match rx_approve.await.unwrap_or_default() { @@ -1952,17 +1968,21 @@ async fn handle_sandbox_error( let cwd = exec_command_context.cwd.clone(); let is_apply_patch = exec_command_context.apply_patch.is_some(); - // Early out if the user never wants to be asked for approval; just return to the model immediately - if sess.approval_policy == AskForApproval::Never { - return ResponseInputItem::FunctionCallOutput { - call_id, - output: FunctionCallOutputPayload { - content: format!( - "failed in sandbox {sandbox_type:?} with execution error: {error}" - ), - success: Some(false), - }, - }; + // Early out if either the user never wants to be asked for approval, or + // we're letting the model manage escalation requests. Otherwise, continue + match sess.approval_policy { + AskForApproval::Never | AskForApproval::OnRequest => { + return ResponseInputItem::FunctionCallOutput { + call_id, + output: FunctionCallOutputPayload { + content: format!( + "failed in sandbox {sandbox_type:?} with execution error: {error}" + ), + success: Some(false), + }, + }; + } + AskForApproval::UnlessTrusted | AskForApproval::OnFailure => (), } // similarly, if the command timed out, we can simply return this failure to the model diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index e98aeeae..10606b68 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -49,6 +49,8 @@ pub struct ExecParams { pub cwd: PathBuf, pub timeout_ms: Option, pub env: HashMap, + pub with_escalated_permissions: Option, + pub justification: Option, } impl ExecParams { diff --git a/codex-rs/core/src/models.rs b/codex-rs/core/src/models.rs index fb48b530..e052bc43 100644 --- a/codex-rs/core/src/models.rs +++ b/codex-rs/core/src/models.rs @@ -191,6 +191,10 @@ pub struct ShellToolCallParams { // The wire format uses `timeout`, which has ambiguous units, so we use // `timeout_ms` as the field name so it is clear in code. pub timeout_ms: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub with_escalated_permissions: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub justification: Option, } #[derive(Debug, Clone, PartialEq)] @@ -302,6 +306,8 @@ mod tests { command: vec!["ls".to_string(), "-l".to_string()], workdir: Some("/tmp".to_string()), timeout_ms: Some(1000), + with_escalated_permissions: None, + justification: None, }, params ); diff --git a/codex-rs/core/src/openai_tools.rs b/codex-rs/core/src/openai_tools.rs index 1dac7081..1c92c07c 100644 --- a/codex-rs/core/src/openai_tools.rs +++ b/codex-rs/core/src/openai_tools.rs @@ -6,6 +6,8 @@ use std::collections::HashMap; use crate::model_family::ModelFamily; use crate::plan_tool::PLAN_TOOL; +use crate::protocol::AskForApproval; +use crate::protocol::SandboxPolicy; #[derive(Debug, Clone, Serialize, PartialEq)] pub struct ResponsesApiTool { @@ -32,6 +34,7 @@ pub(crate) enum OpenAiTool { #[derive(Debug, Clone)] pub enum ConfigShellToolType { DefaultShell, + ShellWithRequest { sandbox_policy: SandboxPolicy }, LocalShell, } @@ -42,12 +45,22 @@ pub struct ToolsConfig { } impl ToolsConfig { - pub fn new(model_family: &ModelFamily, include_plan_tool: bool) -> Self { - let shell_type = if model_family.uses_local_shell_tool { + pub fn new( + model_family: &ModelFamily, + approval_policy: AskForApproval, + sandbox_policy: SandboxPolicy, + include_plan_tool: bool, + ) -> Self { + let mut shell_type = if model_family.uses_local_shell_tool { ConfigShellToolType::LocalShell } else { ConfigShellToolType::DefaultShell }; + if matches!(approval_policy, AskForApproval::OnRequest) { + shell_type = ConfigShellToolType::ShellWithRequest { + sandbox_policy: sandbox_policy.clone(), + } + } Self { shell_type, @@ -60,10 +73,23 @@ impl ToolsConfig { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[serde(tag = "type", rename_all = "lowercase")] pub(crate) enum JsonSchema { - String, - Number, + Boolean { + #[serde(skip_serializing_if = "Option::is_none")] + description: Option, + }, + String { + #[serde(skip_serializing_if = "Option::is_none")] + description: Option, + }, + Number { + #[serde(skip_serializing_if = "Option::is_none")] + description: Option, + }, Array { items: Box, + + #[serde(skip_serializing_if = "Option::is_none")] + description: Option, }, Object { properties: BTreeMap, @@ -77,16 +103,23 @@ pub(crate) enum JsonSchema { }, } -pub(crate) fn create_shell_tool() -> OpenAiTool { +fn create_shell_tool() -> OpenAiTool { let mut properties = BTreeMap::new(); properties.insert( "command".to_string(), JsonSchema::Array { - items: Box::new(JsonSchema::String), + items: Box::new(JsonSchema::String { description: None }), + description: None, }, ); - properties.insert("workdir".to_string(), JsonSchema::String); - properties.insert("timeout".to_string(), JsonSchema::Number); + properties.insert( + "workdir".to_string(), + JsonSchema::String { description: None }, + ); + properties.insert( + "timeout".to_string(), + JsonSchema::Number { description: None }, + ); OpenAiTool::Function(ResponsesApiTool { name: "shell".to_string(), @@ -100,6 +133,105 @@ pub(crate) fn create_shell_tool() -> OpenAiTool { }) } +fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool { + let mut properties = BTreeMap::new(); + properties.insert( + "command".to_string(), + JsonSchema::Array { + items: Box::new(JsonSchema::String { description: None }), + description: Some("The command to execute".to_string()), + }, + ); + properties.insert( + "workdir".to_string(), + JsonSchema::String { + description: Some("The working directory to execute the command in".to_string()), + }, + ); + properties.insert( + "timeout".to_string(), + JsonSchema::Number { + description: Some("The timeout for the command in milliseconds".to_string()), + }, + ); + + if matches!(sandbox_policy, SandboxPolicy::WorkspaceWrite { .. }) { + properties.insert( + "with_escalated_permissions".to_string(), + JsonSchema::Boolean { + description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()), + }, + ); + properties.insert( + "justification".to_string(), + JsonSchema::String { + description: Some("Only set if ask_for_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()), + }, + ); + } + + let description = match sandbox_policy { + SandboxPolicy::WorkspaceWrite { + network_access, + .. + } => { + format!( + r#" +The shell tool is used to execute shell commands. +- When invoking the shell tool, your call will be running in a landlock sandbox, and some shell commands will require escalated privileges: + - Types of actions that require escalated privileges: + - Reading files outside the current directory + - Writing files outside the current directory, and protected folders like .git or .env{} + - Examples of commands that require escalated privileges: + - git commit + - npm install or pnpm install + - cargo build + - cargo test +- When invoking a command that will require escalated privileges: + - Provide the with_escalated_permissions parameter with the boolean value true + - Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter."#, + if !network_access { + "\n - Commands that require network access\n" + } else { + "" + } + ) + } + SandboxPolicy::DangerFullAccess => { + "Runs a shell command and returns its output.".to_string() + } + SandboxPolicy::ReadOnly => { + r#" +The shell tool is used to execute shell commands. +- When invoking the shell tool, your call will be running in a landlock sandbox, and some shell commands (including apply_patch) will require escalated permissions: + - Types of actions that require escalated privileges: + - Reading files outside the current directory + - Writing files + - Applying patches + - Examples of commands that require escalated privileges: + - apply_patch + - git commit + - npm install or pnpm install + - cargo build + - cargo test +- When invoking a command that will require escalated privileges: + - Provide the with_escalated_permissions parameter with the boolean value true + - Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter"#.to_string() + } + }; + + OpenAiTool::Function(ResponsesApiTool { + name: "shell".to_string(), + description, + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["command".to_string()]), + additional_properties: Some(false), + }, + }) +} + /// Returns JSON values that are compatible with Function Calling in the /// Responses API: /// https://platform.openai.com/docs/guides/function-calling?api-mode=responses @@ -184,10 +316,13 @@ pub(crate) fn get_openai_tools( ) -> Vec { let mut tools: Vec = Vec::new(); - match config.shell_type { + match &config.shell_type { ConfigShellToolType::DefaultShell => { tools.push(create_shell_tool()); } + ConfigShellToolType::ShellWithRequest { sandbox_policy } => { + tools.push(create_shell_tool_for_sandbox(sandbox_policy)); + } ConfigShellToolType::LocalShell => { tools.push(OpenAiTool::LocalShell {}); } @@ -245,7 +380,12 @@ mod tests { fn test_get_openai_tools() { let model_family = find_family_for_model("codex-mini-latest") .expect("codex-mini-latest should be a valid model family"); - let config = ToolsConfig::new(&model_family, true); + let config = ToolsConfig::new( + &model_family, + AskForApproval::Never, + SandboxPolicy::ReadOnly, + true, + ); let tools = get_openai_tools(&config, Some(HashMap::new())); assert_eq_tool_names(&tools, &["local_shell", "update_plan"]); @@ -254,7 +394,12 @@ mod tests { #[test] fn test_get_openai_tools_default_shell() { let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); - let config = ToolsConfig::new(&model_family, true); + let config = ToolsConfig::new( + &model_family, + AskForApproval::Never, + SandboxPolicy::ReadOnly, + true, + ); let tools = get_openai_tools(&config, Some(HashMap::new())); assert_eq_tool_names(&tools, &["shell", "update_plan"]); @@ -263,7 +408,12 @@ mod tests { #[test] fn test_get_openai_tools_mcp_tools() { let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); - let config = ToolsConfig::new(&model_family, false); + let config = ToolsConfig::new( + &model_family, + AskForApproval::Never, + SandboxPolicy::ReadOnly, + false, + ); let tools = get_openai_tools( &config, Some(HashMap::from([( @@ -310,14 +460,26 @@ mod tests { name: "test_server/do_something_cool".to_string(), parameters: JsonSchema::Object { properties: BTreeMap::from([ - ("string_argument".to_string(), JsonSchema::String), - ("number_argument".to_string(), JsonSchema::Number), + ( + "string_argument".to_string(), + JsonSchema::String { description: None } + ), + ( + "number_argument".to_string(), + JsonSchema::Number { description: None } + ), ( "object_argument".to_string(), JsonSchema::Object { properties: BTreeMap::from([ - ("string_property".to_string(), JsonSchema::String), - ("number_property".to_string(), JsonSchema::Number), + ( + "string_property".to_string(), + JsonSchema::String { description: None } + ), + ( + "number_property".to_string(), + JsonSchema::Number { description: None } + ), ]), required: Some(vec![ "string_property".to_string(), diff --git a/codex-rs/core/src/plan_tool.rs b/codex-rs/core/src/plan_tool.rs index cfc26a40..bba53632 100644 --- a/codex-rs/core/src/plan_tool.rs +++ b/codex-rs/core/src/plan_tool.rs @@ -39,10 +39,14 @@ pub struct UpdatePlanArgs { pub(crate) static PLAN_TOOL: LazyLock = LazyLock::new(|| { let mut plan_item_props = BTreeMap::new(); - plan_item_props.insert("step".to_string(), JsonSchema::String); - plan_item_props.insert("status".to_string(), JsonSchema::String); + plan_item_props.insert("step".to_string(), JsonSchema::String { description: None }); + plan_item_props.insert( + "status".to_string(), + JsonSchema::String { description: None }, + ); let plan_items_schema = JsonSchema::Array { + description: Some("The list of steps".to_string()), items: Box::new(JsonSchema::Object { properties: plan_item_props, required: Some(vec!["step".to_string(), "status".to_string()]), @@ -51,7 +55,10 @@ pub(crate) static PLAN_TOOL: LazyLock = LazyLock::new(|| { }; let mut properties = BTreeMap::new(); - properties.insert("explanation".to_string(), JsonSchema::String); + properties.insert( + "explanation".to_string(), + JsonSchema::String { description: None }, + ); properties.insert("plan".to_string(), plan_items_schema); OpenAiTool::Function(ResponsesApiTool { diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index aa330f6b..9bf85ec4 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -150,6 +150,9 @@ pub enum AskForApproval { /// the user to approve execution without a sandbox. OnFailure, + /// The model decides when to ask the user for approval. + OnRequest, + /// Never ask the user to approve commands. Failures are immediately returned /// to the model, and never escalated to the user for approval. Never, diff --git a/codex-rs/core/src/safety.rs b/codex-rs/core/src/safety.rs index 224705f8..860a728d 100644 --- a/codex-rs/core/src/safety.rs +++ b/codex-rs/core/src/safety.rs @@ -11,7 +11,7 @@ use crate::is_safe_command::is_known_safe_command; use crate::protocol::AskForApproval; use crate::protocol::SandboxPolicy; -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum SafetyCheck { AutoApprove { sandbox_type: SandboxType }, AskUser, @@ -31,7 +31,7 @@ pub fn assess_patch_safety( } match policy { - AskForApproval::OnFailure | AskForApproval::Never => { + AskForApproval::OnFailure | AskForApproval::Never | AskForApproval::OnRequest => { // Continue to see if this can be auto-approved. } // TODO(ragona): I'm not sure this is actually correct? I believe in this case @@ -76,6 +76,7 @@ pub fn assess_command_safety( approval_policy: AskForApproval, sandbox_policy: &SandboxPolicy, approved: &HashSet>, + with_escalated_permissions: bool, ) -> SafetyCheck { // A command is "trusted" because either: // - it belongs to a set of commands we consider "safe" by default, or @@ -96,12 +97,13 @@ pub fn assess_command_safety( }; } - assess_safety_for_untrusted_command(approval_policy, sandbox_policy) + assess_safety_for_untrusted_command(approval_policy, sandbox_policy, with_escalated_permissions) } pub(crate) fn assess_safety_for_untrusted_command( approval_policy: AskForApproval, sandbox_policy: &SandboxPolicy, + with_escalated_permissions: bool, ) -> SafetyCheck { use AskForApproval::*; use SandboxPolicy::*; @@ -113,9 +115,23 @@ pub(crate) fn assess_safety_for_untrusted_command( // commands. SafetyCheck::AskUser } - (OnFailure, DangerFullAccess) | (Never, DangerFullAccess) => SafetyCheck::AutoApprove { + (OnFailure, DangerFullAccess) + | (Never, DangerFullAccess) + | (OnRequest, DangerFullAccess) => SafetyCheck::AutoApprove { sandbox_type: SandboxType::None, }, + (OnRequest, ReadOnly) | (OnRequest, WorkspaceWrite { .. }) => { + if with_escalated_permissions { + SafetyCheck::AskUser + } else { + match get_platform_sandbox() { + Some(sandbox_type) => SafetyCheck::AutoApprove { sandbox_type }, + // Fall back to asking since the command is untrusted and + // we do not have a sandbox available + None => SafetyCheck::AskUser, + } + } + } (Never, ReadOnly) | (Never, WorkspaceWrite { .. }) | (OnFailure, ReadOnly) @@ -264,4 +280,47 @@ mod tests { &cwd, )) } + + #[test] + fn test_request_escalated_privileges() { + // Should not be a trusted command + let command = vec!["git commit".to_string()]; + let approval_policy = AskForApproval::OnRequest; + let sandbox_policy = SandboxPolicy::ReadOnly; + let approved: HashSet> = HashSet::new(); + let request_escalated_privileges = true; + + let safety_check = assess_command_safety( + &command, + approval_policy, + &sandbox_policy, + &approved, + request_escalated_privileges, + ); + + assert_eq!(safety_check, SafetyCheck::AskUser); + } + + #[test] + fn test_request_escalated_privileges_no_sandbox_fallback() { + let command = vec!["git".to_string(), "commit".to_string()]; + let approval_policy = AskForApproval::OnRequest; + let sandbox_policy = SandboxPolicy::ReadOnly; + let approved: HashSet> = HashSet::new(); + let request_escalated_privileges = false; + + let safety_check = assess_command_safety( + &command, + approval_policy, + &sandbox_policy, + &approved, + request_escalated_privileges, + ); + + let expected = match get_platform_sandbox() { + Some(sandbox_type) => SafetyCheck::AutoApprove { sandbox_type }, + None => SafetyCheck::AskUser, + }; + assert_eq!(safety_check, expected); + } } diff --git a/codex-rs/core/src/shell.rs b/codex-rs/core/src/shell.rs index 1e895a37..de0764f7 100644 --- a/codex-rs/core/src/shell.rs +++ b/codex-rs/core/src/shell.rs @@ -215,6 +215,8 @@ mod tests { "HOME".to_string(), temp_home.path().to_str().unwrap().to_string(), )]), + with_escalated_permissions: None, + justification: None, }, SandboxType::None, Arc::new(Notify::new()), diff --git a/codex-rs/core/tests/exec.rs b/codex-rs/core/tests/exec.rs index da169296..f1b9e78e 100644 --- a/codex-rs/core/tests/exec.rs +++ b/codex-rs/core/tests/exec.rs @@ -28,6 +28,8 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) { cwd: tmp.path().to_path_buf(), timeout_ms: Some(1000), env: HashMap::new(), + with_escalated_permissions: None, + justification: None, }; let ctrl_c = Arc::new(Notify::new()); diff --git a/codex-rs/core/tests/exec_stream_events.rs b/codex-rs/core/tests/exec_stream_events.rs index 50f6888f..534b2551 100644 --- a/codex-rs/core/tests/exec_stream_events.rs +++ b/codex-rs/core/tests/exec_stream_events.rs @@ -53,6 +53,8 @@ async fn test_exec_stdout_stream_events_echo() { cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")), timeout_ms: Some(5_000), env: HashMap::new(), + with_escalated_permissions: None, + justification: None, }; let ctrl_c = Arc::new(Notify::new()); @@ -103,6 +105,8 @@ async fn test_exec_stderr_stream_events_echo() { cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")), timeout_ms: Some(5_000), env: HashMap::new(), + with_escalated_permissions: None, + justification: None, }; let ctrl_c = Arc::new(Notify::new()); diff --git a/codex-rs/linux-sandbox/tests/landlock.rs b/codex-rs/linux-sandbox/tests/landlock.rs index 1375a4c6..041e64e2 100644 --- a/codex-rs/linux-sandbox/tests/landlock.rs +++ b/codex-rs/linux-sandbox/tests/landlock.rs @@ -44,6 +44,8 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) { cwd: std::env::current_dir().expect("cwd should exist"), timeout_ms: Some(timeout_ms), env: create_env_from_core_vars(), + with_escalated_permissions: None, + justification: None, }; let sandbox_policy = SandboxPolicy::WorkspaceWrite { @@ -139,6 +141,8 @@ async fn assert_network_blocked(cmd: &[&str]) { // do not stall the suite. timeout_ms: Some(NETWORK_TIMEOUT_MS), env: create_env_from_core_vars(), + with_escalated_permissions: None, + justification: None, }; let sandbox_policy = SandboxPolicy::new_read_only_policy();