Use a unified shell tell to not break cache (#3814)
Currently, we change the tool description according to the sandbox policy and approval policy. This breaks the cache when the user hits `/approvals`. This PR does the following: - Always use the shell with escalation parameter: - removes `create_shell_tool_for_sandbox` and always uses unified tool via `create_shell_tool` - Reject the func call when the model uses escalation parameter when it cannot.
This commit is contained in:
@@ -7,8 +7,6 @@ use std::collections::HashMap;
|
||||
|
||||
use crate::model_family::ModelFamily;
|
||||
use crate::plan_tool::PLAN_TOOL;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::tool_apply_patch::ApplyPatchToolType;
|
||||
use crate::tool_apply_patch::create_apply_patch_freeform_tool;
|
||||
use crate::tool_apply_patch::create_apply_patch_json_tool;
|
||||
@@ -57,10 +55,9 @@ pub(crate) enum OpenAiTool {
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ConfigShellToolType {
|
||||
DefaultShell,
|
||||
ShellWithRequest { sandbox_policy: SandboxPolicy },
|
||||
LocalShell,
|
||||
StreamableShell,
|
||||
Default,
|
||||
Local,
|
||||
Streamable,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -75,8 +72,6 @@ pub(crate) struct ToolsConfig {
|
||||
|
||||
pub(crate) struct ToolsConfigParams<'a> {
|
||||
pub(crate) model_family: &'a ModelFamily,
|
||||
pub(crate) approval_policy: AskForApproval,
|
||||
pub(crate) sandbox_policy: SandboxPolicy,
|
||||
pub(crate) include_plan_tool: bool,
|
||||
pub(crate) include_apply_patch_tool: bool,
|
||||
pub(crate) include_web_search_request: bool,
|
||||
@@ -89,8 +84,6 @@ impl ToolsConfig {
|
||||
pub fn new(params: &ToolsConfigParams) -> Self {
|
||||
let ToolsConfigParams {
|
||||
model_family,
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
include_plan_tool,
|
||||
include_apply_patch_tool,
|
||||
include_web_search_request,
|
||||
@@ -98,18 +91,13 @@ impl ToolsConfig {
|
||||
include_view_image_tool,
|
||||
experimental_unified_exec_tool,
|
||||
} = params;
|
||||
let mut shell_type = if *use_streamable_shell_tool {
|
||||
ConfigShellToolType::StreamableShell
|
||||
let shell_type = if *use_streamable_shell_tool {
|
||||
ConfigShellToolType::Streamable
|
||||
} else if model_family.uses_local_shell_tool {
|
||||
ConfigShellToolType::LocalShell
|
||||
ConfigShellToolType::Local
|
||||
} else {
|
||||
ConfigShellToolType::DefaultShell
|
||||
ConfigShellToolType::Default
|
||||
};
|
||||
if matches!(approval_policy, AskForApproval::OnRequest) && !use_streamable_shell_tool {
|
||||
shell_type = ConfigShellToolType::ShellWithRequest {
|
||||
sandbox_policy: sandbox_policy.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
let apply_patch_tool_type = match model_family.apply_patch_tool_type {
|
||||
Some(ApplyPatchToolType::Freeform) => Some(ApplyPatchToolType::Freeform),
|
||||
@@ -170,40 +158,6 @@ pub(crate) enum JsonSchema {
|
||||
},
|
||||
}
|
||||
|
||||
fn create_shell_tool() -> OpenAiTool {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
JsonSchema::Array {
|
||||
items: Box::new(JsonSchema::String { description: None }),
|
||||
description: Some("The command to execute".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"workdir".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("The working directory to execute the command in".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"timeout_ms".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("The timeout for the command in milliseconds".to_string()),
|
||||
},
|
||||
);
|
||||
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "shell".to_string(),
|
||||
description: "Runs a shell command and returns its output".to_string(),
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: Some(vec!["command".to_string()]),
|
||||
additional_properties: Some(false),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn create_unified_exec_tool() -> OpenAiTool {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
@@ -251,7 +205,7 @@ fn create_unified_exec_tool() -> OpenAiTool {
|
||||
})
|
||||
}
|
||||
|
||||
fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
|
||||
fn create_shell_tool() -> OpenAiTool {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
@@ -273,20 +227,18 @@ fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
|
||||
},
|
||||
);
|
||||
|
||||
if !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
|
||||
properties.insert(
|
||||
properties.insert(
|
||||
"with_escalated_permissions".to_string(),
|
||||
JsonSchema::Boolean {
|
||||
description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
properties.insert(
|
||||
"justification".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "shell".to_string(),
|
||||
@@ -537,16 +489,13 @@ pub(crate) fn get_openai_tools(
|
||||
tools.push(create_unified_exec_tool());
|
||||
} else {
|
||||
match &config.shell_type {
|
||||
ConfigShellToolType::DefaultShell => {
|
||||
ConfigShellToolType::Default => {
|
||||
tools.push(create_shell_tool());
|
||||
}
|
||||
ConfigShellToolType::ShellWithRequest { sandbox_policy } => {
|
||||
tools.push(create_shell_tool_for_sandbox(sandbox_policy));
|
||||
}
|
||||
ConfigShellToolType::LocalShell => {
|
||||
ConfigShellToolType::Local => {
|
||||
tools.push(OpenAiTool::LocalShell {});
|
||||
}
|
||||
ConfigShellToolType::StreamableShell => {
|
||||
ConfigShellToolType::Streamable => {
|
||||
tools.push(OpenAiTool::Function(
|
||||
crate::exec_command::create_exec_command_tool_for_responses_api(),
|
||||
));
|
||||
@@ -636,8 +585,6 @@ mod tests {
|
||||
.expect("codex-mini-latest should be a valid model family");
|
||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_family: &model_family,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
include_plan_tool: true,
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
@@ -658,8 +605,6 @@ mod tests {
|
||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_family: &model_family,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
include_plan_tool: true,
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
@@ -680,8 +625,6 @@ mod tests {
|
||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_family: &model_family,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
include_plan_tool: false,
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
@@ -786,8 +729,6 @@ mod tests {
|
||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_family: &model_family,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
include_plan_tool: false,
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: false,
|
||||
@@ -864,8 +805,6 @@ mod tests {
|
||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_family: &model_family,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
include_plan_tool: false,
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
@@ -927,8 +866,6 @@ mod tests {
|
||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_family: &model_family,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
include_plan_tool: false,
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
@@ -985,8 +922,6 @@ mod tests {
|
||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_family: &model_family,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
include_plan_tool: false,
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
@@ -1046,8 +981,6 @@ mod tests {
|
||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_family: &model_family,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
include_plan_tool: false,
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
@@ -1100,14 +1033,8 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shell_tool_for_sandbox_workspace_write() {
|
||||
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec!["workspace".into()],
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: false,
|
||||
exclude_slash_tmp: false,
|
||||
};
|
||||
let tool = super::create_shell_tool_for_sandbox(&sandbox_policy);
|
||||
fn test_shell_tool() {
|
||||
let tool = super::create_shell_tool();
|
||||
let OpenAiTool::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
@@ -1119,33 +1046,4 @@ mod tests {
|
||||
let expected = "Runs a shell command and returns its output.";
|
||||
assert_eq!(description, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shell_tool_for_sandbox_readonly() {
|
||||
let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::ReadOnly);
|
||||
let OpenAiTool::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
else {
|
||||
panic!("expected function tool");
|
||||
};
|
||||
assert_eq!(name, "shell");
|
||||
|
||||
let expected = "Runs a shell command and returns its output.";
|
||||
assert_eq!(description, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shell_tool_for_sandbox_danger_full_access() {
|
||||
let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::DangerFullAccess);
|
||||
let OpenAiTool::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
else {
|
||||
panic!("expected function tool");
|
||||
};
|
||||
assert_eq!(name, "shell");
|
||||
|
||||
assert_eq!(description, "Runs a shell command and returns its output.");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user