diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index e9f52b1c..8852b258 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -456,8 +456,6 @@ impl Session { client, tools_config: ToolsConfig::new(&ToolsConfigParams { model_family: &config.model_family, - approval_policy, - sandbox_policy: sandbox_policy.clone(), include_plan_tool: config.include_plan_tool, include_apply_patch_tool: config.include_apply_patch_tool, include_web_search_request: config.tools_web_search_request, @@ -1237,8 +1235,6 @@ async fn submission_loop( let tools_config = ToolsConfig::new(&ToolsConfigParams { model_family: &effective_family, - approval_policy: new_approval_policy, - sandbox_policy: new_sandbox_policy.clone(), include_plan_tool: config.include_plan_tool, include_apply_patch_tool: config.include_apply_patch_tool, include_web_search_request: config.tools_web_search_request, @@ -1325,8 +1321,6 @@ async fn submission_loop( client, tools_config: ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, - approval_policy, - sandbox_policy: sandbox_policy.clone(), include_plan_tool: config.include_plan_tool, include_apply_patch_tool: config.include_apply_patch_tool, include_web_search_request: config.tools_web_search_request, @@ -1553,8 +1547,6 @@ async fn spawn_review_thread( .unwrap_or_else(|| parent_turn_context.client.get_model_family()); let tools_config = ToolsConfig::new(&ToolsConfigParams { model_family: &review_model_family, - approval_policy: parent_turn_context.approval_policy, - sandbox_policy: parent_turn_context.sandbox_policy.clone(), include_plan_tool: false, include_apply_patch_tool: config.include_apply_patch_tool, include_web_search_request: false, @@ -2724,6 +2716,21 @@ async fn handle_container_exec_with_params( sub_id: String, call_id: String, ) -> ResponseInputItem { + if params.with_escalated_permissions.unwrap_or(false) + && !matches!(turn_context.approval_policy, AskForApproval::OnRequest) + { + return ResponseInputItem::FunctionCallOutput { + call_id, + output: FunctionCallOutputPayload { + content: format!( + "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}", + policy = turn_context.approval_policy + ), + success: None, + }, + }; + } + // check if this was a patch, and apply it if so let apply_patch_exec = match maybe_parse_apply_patch_verified(¶ms.command, ¶ms.cwd) { MaybeApplyPatchVerified::Body(changes) => { @@ -3345,6 +3352,7 @@ mod tests { use mcp_types::ContentBlock; use mcp_types::TextContent; use pretty_assertions::assert_eq; + use serde::Deserialize; use serde_json::json; use std::path::PathBuf; use std::sync::Arc; @@ -3594,8 +3602,6 @@ mod tests { ); let tools_config = ToolsConfig::new(&ToolsConfigParams { model_family: &config.model_family, - approval_policy: config.approval_policy, - sandbox_policy: config.sandbox_policy.clone(), include_plan_tool: config.include_plan_tool, include_apply_patch_tool: config.include_apply_patch_tool, include_web_search_request: config.tools_web_search_request, @@ -3735,4 +3741,105 @@ mod tests { (rollout_items, live_history.contents()) } + + #[tokio::test] + async fn rejects_escalated_permissions_when_policy_not_on_request() { + use crate::exec::ExecParams; + use crate::protocol::AskForApproval; + use crate::protocol::SandboxPolicy; + use crate::turn_diff_tracker::TurnDiffTracker; + use std::collections::HashMap; + + let (session, mut turn_context) = make_session_and_context(); + // Ensure policy is NOT OnRequest so the early rejection path triggers + turn_context.approval_policy = AskForApproval::OnFailure; + + let params = ExecParams { + command: if cfg!(windows) { + vec![ + "cmd.exe".to_string(), + "/C".to_string(), + "echo hi".to_string(), + ] + } else { + vec![ + "/bin/sh".to_string(), + "-c".to_string(), + "echo hi".to_string(), + ] + }, + cwd: turn_context.cwd.clone(), + timeout_ms: Some(1000), + env: HashMap::new(), + with_escalated_permissions: Some(true), + justification: Some("test".to_string()), + }; + + let params2 = ExecParams { + with_escalated_permissions: Some(false), + ..params.clone() + }; + + let mut turn_diff_tracker = TurnDiffTracker::new(); + + let sub_id = "test-sub".to_string(); + let call_id = "test-call".to_string(); + + let resp = handle_container_exec_with_params( + params, + &session, + &turn_context, + &mut turn_diff_tracker, + sub_id, + call_id, + ) + .await; + + let ResponseInputItem::FunctionCallOutput { output, .. } = resp else { + panic!("expected FunctionCallOutput"); + }; + + let expected = format!( + "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}", + policy = turn_context.approval_policy + ); + + pretty_assertions::assert_eq!(output.content, expected); + + // Now retry the same command WITHOUT escalated permissions; should succeed. + // Force DangerFullAccess to avoid platform sandbox dependencies in tests. + turn_context.sandbox_policy = SandboxPolicy::DangerFullAccess; + + let resp2 = handle_container_exec_with_params( + params2, + &session, + &turn_context, + &mut turn_diff_tracker, + "test-sub".to_string(), + "test-call-2".to_string(), + ) + .await; + + let ResponseInputItem::FunctionCallOutput { output, .. } = resp2 else { + panic!("expected FunctionCallOutput on retry"); + }; + + #[derive(Deserialize, PartialEq, Eq, Debug)] + struct ResponseExecMetadata { + exit_code: i32, + } + + #[derive(Deserialize)] + struct ResponseExecOutput { + output: String, + metadata: ResponseExecMetadata, + } + + let exec_output: ResponseExecOutput = + serde_json::from_str(&output.content).expect("valid exec output json"); + + pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 }); + assert!(exec_output.output.contains("hi")); + pretty_assertions::assert_eq!(output.success, Some(true)); + } } diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index 9e11604c..d84bbc9f 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -45,7 +45,7 @@ const AGGREGATE_BUFFER_INITIAL_CAPACITY: usize = 8 * 1024; // 8 KiB /// Aggregation still collects full output; only the live event stream is capped. pub(crate) const MAX_EXEC_OUTPUT_DELTAS_PER_CALL: usize = 10_000; -#[derive(Debug, Clone)] +#[derive(Clone, Debug)] pub struct ExecParams { pub command: Vec, pub cwd: PathBuf, diff --git a/codex-rs/core/src/openai_tools.rs b/codex-rs/core/src/openai_tools.rs index b57d1b76..05b71ce6 100644 --- a/codex-rs/core/src/openai_tools.rs +++ b/codex-rs/core/src/openai_tools.rs @@ -7,8 +7,6 @@ use std::collections::HashMap; use crate::model_family::ModelFamily; use crate::plan_tool::PLAN_TOOL; -use crate::protocol::AskForApproval; -use crate::protocol::SandboxPolicy; use crate::tool_apply_patch::ApplyPatchToolType; use crate::tool_apply_patch::create_apply_patch_freeform_tool; use crate::tool_apply_patch::create_apply_patch_json_tool; @@ -57,10 +55,9 @@ pub(crate) enum OpenAiTool { #[derive(Debug, Clone)] pub enum ConfigShellToolType { - DefaultShell, - ShellWithRequest { sandbox_policy: SandboxPolicy }, - LocalShell, - StreamableShell, + Default, + Local, + Streamable, } #[derive(Debug, Clone)] @@ -75,8 +72,6 @@ pub(crate) struct ToolsConfig { pub(crate) struct ToolsConfigParams<'a> { pub(crate) model_family: &'a ModelFamily, - pub(crate) approval_policy: AskForApproval, - pub(crate) sandbox_policy: SandboxPolicy, pub(crate) include_plan_tool: bool, pub(crate) include_apply_patch_tool: bool, pub(crate) include_web_search_request: bool, @@ -89,8 +84,6 @@ impl ToolsConfig { pub fn new(params: &ToolsConfigParams) -> Self { let ToolsConfigParams { model_family, - approval_policy, - sandbox_policy, include_plan_tool, include_apply_patch_tool, include_web_search_request, @@ -98,18 +91,13 @@ impl ToolsConfig { include_view_image_tool, experimental_unified_exec_tool, } = params; - let mut shell_type = if *use_streamable_shell_tool { - ConfigShellToolType::StreamableShell + let shell_type = if *use_streamable_shell_tool { + ConfigShellToolType::Streamable } else if model_family.uses_local_shell_tool { - ConfigShellToolType::LocalShell + ConfigShellToolType::Local } else { - ConfigShellToolType::DefaultShell + ConfigShellToolType::Default }; - if matches!(approval_policy, AskForApproval::OnRequest) && !use_streamable_shell_tool { - shell_type = ConfigShellToolType::ShellWithRequest { - sandbox_policy: sandbox_policy.clone(), - } - } let apply_patch_tool_type = match model_family.apply_patch_tool_type { Some(ApplyPatchToolType::Freeform) => Some(ApplyPatchToolType::Freeform), @@ -170,40 +158,6 @@ pub(crate) enum JsonSchema { }, } -fn create_shell_tool() -> OpenAiTool { - let mut properties = BTreeMap::new(); - properties.insert( - "command".to_string(), - JsonSchema::Array { - items: Box::new(JsonSchema::String { description: None }), - description: Some("The command to execute".to_string()), - }, - ); - properties.insert( - "workdir".to_string(), - JsonSchema::String { - description: Some("The working directory to execute the command in".to_string()), - }, - ); - properties.insert( - "timeout_ms".to_string(), - JsonSchema::Number { - description: Some("The timeout for the command in milliseconds".to_string()), - }, - ); - - OpenAiTool::Function(ResponsesApiTool { - name: "shell".to_string(), - description: "Runs a shell command and returns its output".to_string(), - strict: false, - parameters: JsonSchema::Object { - properties, - required: Some(vec!["command".to_string()]), - additional_properties: Some(false), - }, - }) -} - fn create_unified_exec_tool() -> OpenAiTool { let mut properties = BTreeMap::new(); properties.insert( @@ -251,7 +205,7 @@ fn create_unified_exec_tool() -> OpenAiTool { }) } -fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool { +fn create_shell_tool() -> OpenAiTool { let mut properties = BTreeMap::new(); properties.insert( "command".to_string(), @@ -273,20 +227,18 @@ fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool { }, ); - if !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) { - properties.insert( + properties.insert( "with_escalated_permissions".to_string(), JsonSchema::Boolean { description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()), }, ); - properties.insert( + properties.insert( "justification".to_string(), JsonSchema::String { description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()), }, ); - } OpenAiTool::Function(ResponsesApiTool { name: "shell".to_string(), @@ -537,16 +489,13 @@ pub(crate) fn get_openai_tools( tools.push(create_unified_exec_tool()); } else { match &config.shell_type { - ConfigShellToolType::DefaultShell => { + ConfigShellToolType::Default => { tools.push(create_shell_tool()); } - ConfigShellToolType::ShellWithRequest { sandbox_policy } => { - tools.push(create_shell_tool_for_sandbox(sandbox_policy)); - } - ConfigShellToolType::LocalShell => { + ConfigShellToolType::Local => { tools.push(OpenAiTool::LocalShell {}); } - ConfigShellToolType::StreamableShell => { + ConfigShellToolType::Streamable => { tools.push(OpenAiTool::Function( crate::exec_command::create_exec_command_tool_for_responses_api(), )); @@ -636,8 +585,6 @@ mod tests { .expect("codex-mini-latest should be a valid model family"); let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::ReadOnly, include_plan_tool: true, include_apply_patch_tool: false, include_web_search_request: true, @@ -658,8 +605,6 @@ mod tests { let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::ReadOnly, include_plan_tool: true, include_apply_patch_tool: false, include_web_search_request: true, @@ -680,8 +625,6 @@ mod tests { let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::ReadOnly, include_plan_tool: false, include_apply_patch_tool: false, include_web_search_request: true, @@ -786,8 +729,6 @@ mod tests { let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::ReadOnly, include_plan_tool: false, include_apply_patch_tool: false, include_web_search_request: false, @@ -864,8 +805,6 @@ mod tests { let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::ReadOnly, include_plan_tool: false, include_apply_patch_tool: false, include_web_search_request: true, @@ -927,8 +866,6 @@ mod tests { let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::ReadOnly, include_plan_tool: false, include_apply_patch_tool: false, include_web_search_request: true, @@ -985,8 +922,6 @@ mod tests { let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::ReadOnly, include_plan_tool: false, include_apply_patch_tool: false, include_web_search_request: true, @@ -1046,8 +981,6 @@ mod tests { let model_family = find_family_for_model("o3").expect("o3 should be a valid model family"); let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::ReadOnly, include_plan_tool: false, include_apply_patch_tool: false, include_web_search_request: true, @@ -1100,14 +1033,8 @@ mod tests { } #[test] - fn test_shell_tool_for_sandbox_workspace_write() { - let sandbox_policy = SandboxPolicy::WorkspaceWrite { - writable_roots: vec!["workspace".into()], - network_access: false, - exclude_tmpdir_env_var: false, - exclude_slash_tmp: false, - }; - let tool = super::create_shell_tool_for_sandbox(&sandbox_policy); + fn test_shell_tool() { + let tool = super::create_shell_tool(); let OpenAiTool::Function(ResponsesApiTool { description, name, .. }) = &tool @@ -1119,33 +1046,4 @@ mod tests { let expected = "Runs a shell command and returns its output."; assert_eq!(description, expected); } - - #[test] - fn test_shell_tool_for_sandbox_readonly() { - let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::ReadOnly); - let OpenAiTool::Function(ResponsesApiTool { - description, name, .. - }) = &tool - else { - panic!("expected function tool"); - }; - assert_eq!(name, "shell"); - - let expected = "Runs a shell command and returns its output."; - assert_eq!(description, expected); - } - - #[test] - fn test_shell_tool_for_sandbox_danger_full_access() { - let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::DangerFullAccess); - let OpenAiTool::Function(ResponsesApiTool { - description, name, .. - }) = &tool - else { - panic!("expected function tool"); - }; - assert_eq!(name, "shell"); - - assert_eq!(description, "Runs a shell command and returns its output."); - } }