Use a unified shell tell to not break cache (#3814)
Currently, we change the tool description according to the sandbox policy and approval policy. This breaks the cache when the user hits `/approvals`. This PR does the following: - Always use the shell with escalation parameter: - removes `create_shell_tool_for_sandbox` and always uses unified tool via `create_shell_tool` - Reject the func call when the model uses escalation parameter when it cannot.
This commit is contained in:
@@ -456,8 +456,6 @@ impl Session {
|
|||||||
client,
|
client,
|
||||||
tools_config: ToolsConfig::new(&ToolsConfigParams {
|
tools_config: ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &config.model_family,
|
model_family: &config.model_family,
|
||||||
approval_policy,
|
|
||||||
sandbox_policy: sandbox_policy.clone(),
|
|
||||||
include_plan_tool: config.include_plan_tool,
|
include_plan_tool: config.include_plan_tool,
|
||||||
include_apply_patch_tool: config.include_apply_patch_tool,
|
include_apply_patch_tool: config.include_apply_patch_tool,
|
||||||
include_web_search_request: config.tools_web_search_request,
|
include_web_search_request: config.tools_web_search_request,
|
||||||
@@ -1237,8 +1235,6 @@ async fn submission_loop(
|
|||||||
|
|
||||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &effective_family,
|
model_family: &effective_family,
|
||||||
approval_policy: new_approval_policy,
|
|
||||||
sandbox_policy: new_sandbox_policy.clone(),
|
|
||||||
include_plan_tool: config.include_plan_tool,
|
include_plan_tool: config.include_plan_tool,
|
||||||
include_apply_patch_tool: config.include_apply_patch_tool,
|
include_apply_patch_tool: config.include_apply_patch_tool,
|
||||||
include_web_search_request: config.tools_web_search_request,
|
include_web_search_request: config.tools_web_search_request,
|
||||||
@@ -1325,8 +1321,6 @@ async fn submission_loop(
|
|||||||
client,
|
client,
|
||||||
tools_config: ToolsConfig::new(&ToolsConfigParams {
|
tools_config: ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy,
|
|
||||||
sandbox_policy: sandbox_policy.clone(),
|
|
||||||
include_plan_tool: config.include_plan_tool,
|
include_plan_tool: config.include_plan_tool,
|
||||||
include_apply_patch_tool: config.include_apply_patch_tool,
|
include_apply_patch_tool: config.include_apply_patch_tool,
|
||||||
include_web_search_request: config.tools_web_search_request,
|
include_web_search_request: config.tools_web_search_request,
|
||||||
@@ -1553,8 +1547,6 @@ async fn spawn_review_thread(
|
|||||||
.unwrap_or_else(|| parent_turn_context.client.get_model_family());
|
.unwrap_or_else(|| parent_turn_context.client.get_model_family());
|
||||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &review_model_family,
|
model_family: &review_model_family,
|
||||||
approval_policy: parent_turn_context.approval_policy,
|
|
||||||
sandbox_policy: parent_turn_context.sandbox_policy.clone(),
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: config.include_apply_patch_tool,
|
include_apply_patch_tool: config.include_apply_patch_tool,
|
||||||
include_web_search_request: false,
|
include_web_search_request: false,
|
||||||
@@ -2724,6 +2716,21 @@ async fn handle_container_exec_with_params(
|
|||||||
sub_id: String,
|
sub_id: String,
|
||||||
call_id: String,
|
call_id: String,
|
||||||
) -> ResponseInputItem {
|
) -> ResponseInputItem {
|
||||||
|
if params.with_escalated_permissions.unwrap_or(false)
|
||||||
|
&& !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
|
||||||
|
{
|
||||||
|
return ResponseInputItem::FunctionCallOutput {
|
||||||
|
call_id,
|
||||||
|
output: FunctionCallOutputPayload {
|
||||||
|
content: format!(
|
||||||
|
"approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
|
||||||
|
policy = turn_context.approval_policy
|
||||||
|
),
|
||||||
|
success: None,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// check if this was a patch, and apply it if so
|
// check if this was a patch, and apply it if so
|
||||||
let apply_patch_exec = match maybe_parse_apply_patch_verified(¶ms.command, ¶ms.cwd) {
|
let apply_patch_exec = match maybe_parse_apply_patch_verified(¶ms.command, ¶ms.cwd) {
|
||||||
MaybeApplyPatchVerified::Body(changes) => {
|
MaybeApplyPatchVerified::Body(changes) => {
|
||||||
@@ -3345,6 +3352,7 @@ mod tests {
|
|||||||
use mcp_types::ContentBlock;
|
use mcp_types::ContentBlock;
|
||||||
use mcp_types::TextContent;
|
use mcp_types::TextContent;
|
||||||
use pretty_assertions::assert_eq;
|
use pretty_assertions::assert_eq;
|
||||||
|
use serde::Deserialize;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -3594,8 +3602,6 @@ mod tests {
|
|||||||
);
|
);
|
||||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &config.model_family,
|
model_family: &config.model_family,
|
||||||
approval_policy: config.approval_policy,
|
|
||||||
sandbox_policy: config.sandbox_policy.clone(),
|
|
||||||
include_plan_tool: config.include_plan_tool,
|
include_plan_tool: config.include_plan_tool,
|
||||||
include_apply_patch_tool: config.include_apply_patch_tool,
|
include_apply_patch_tool: config.include_apply_patch_tool,
|
||||||
include_web_search_request: config.tools_web_search_request,
|
include_web_search_request: config.tools_web_search_request,
|
||||||
@@ -3735,4 +3741,105 @@ mod tests {
|
|||||||
|
|
||||||
(rollout_items, live_history.contents())
|
(rollout_items, live_history.contents())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn rejects_escalated_permissions_when_policy_not_on_request() {
|
||||||
|
use crate::exec::ExecParams;
|
||||||
|
use crate::protocol::AskForApproval;
|
||||||
|
use crate::protocol::SandboxPolicy;
|
||||||
|
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
let (session, mut turn_context) = make_session_and_context();
|
||||||
|
// Ensure policy is NOT OnRequest so the early rejection path triggers
|
||||||
|
turn_context.approval_policy = AskForApproval::OnFailure;
|
||||||
|
|
||||||
|
let params = ExecParams {
|
||||||
|
command: if cfg!(windows) {
|
||||||
|
vec![
|
||||||
|
"cmd.exe".to_string(),
|
||||||
|
"/C".to_string(),
|
||||||
|
"echo hi".to_string(),
|
||||||
|
]
|
||||||
|
} else {
|
||||||
|
vec![
|
||||||
|
"/bin/sh".to_string(),
|
||||||
|
"-c".to_string(),
|
||||||
|
"echo hi".to_string(),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
cwd: turn_context.cwd.clone(),
|
||||||
|
timeout_ms: Some(1000),
|
||||||
|
env: HashMap::new(),
|
||||||
|
with_escalated_permissions: Some(true),
|
||||||
|
justification: Some("test".to_string()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let params2 = ExecParams {
|
||||||
|
with_escalated_permissions: Some(false),
|
||||||
|
..params.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut turn_diff_tracker = TurnDiffTracker::new();
|
||||||
|
|
||||||
|
let sub_id = "test-sub".to_string();
|
||||||
|
let call_id = "test-call".to_string();
|
||||||
|
|
||||||
|
let resp = handle_container_exec_with_params(
|
||||||
|
params,
|
||||||
|
&session,
|
||||||
|
&turn_context,
|
||||||
|
&mut turn_diff_tracker,
|
||||||
|
sub_id,
|
||||||
|
call_id,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let ResponseInputItem::FunctionCallOutput { output, .. } = resp else {
|
||||||
|
panic!("expected FunctionCallOutput");
|
||||||
|
};
|
||||||
|
|
||||||
|
let expected = format!(
|
||||||
|
"approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
|
||||||
|
policy = turn_context.approval_policy
|
||||||
|
);
|
||||||
|
|
||||||
|
pretty_assertions::assert_eq!(output.content, expected);
|
||||||
|
|
||||||
|
// Now retry the same command WITHOUT escalated permissions; should succeed.
|
||||||
|
// Force DangerFullAccess to avoid platform sandbox dependencies in tests.
|
||||||
|
turn_context.sandbox_policy = SandboxPolicy::DangerFullAccess;
|
||||||
|
|
||||||
|
let resp2 = handle_container_exec_with_params(
|
||||||
|
params2,
|
||||||
|
&session,
|
||||||
|
&turn_context,
|
||||||
|
&mut turn_diff_tracker,
|
||||||
|
"test-sub".to_string(),
|
||||||
|
"test-call-2".to_string(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let ResponseInputItem::FunctionCallOutput { output, .. } = resp2 else {
|
||||||
|
panic!("expected FunctionCallOutput on retry");
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Deserialize, PartialEq, Eq, Debug)]
|
||||||
|
struct ResponseExecMetadata {
|
||||||
|
exit_code: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct ResponseExecOutput {
|
||||||
|
output: String,
|
||||||
|
metadata: ResponseExecMetadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
let exec_output: ResponseExecOutput =
|
||||||
|
serde_json::from_str(&output.content).expect("valid exec output json");
|
||||||
|
|
||||||
|
pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
|
||||||
|
assert!(exec_output.output.contains("hi"));
|
||||||
|
pretty_assertions::assert_eq!(output.success, Some(true));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ const AGGREGATE_BUFFER_INITIAL_CAPACITY: usize = 8 * 1024; // 8 KiB
|
|||||||
/// Aggregation still collects full output; only the live event stream is capped.
|
/// Aggregation still collects full output; only the live event stream is capped.
|
||||||
pub(crate) const MAX_EXEC_OUTPUT_DELTAS_PER_CALL: usize = 10_000;
|
pub(crate) const MAX_EXEC_OUTPUT_DELTAS_PER_CALL: usize = 10_000;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct ExecParams {
|
pub struct ExecParams {
|
||||||
pub command: Vec<String>,
|
pub command: Vec<String>,
|
||||||
pub cwd: PathBuf,
|
pub cwd: PathBuf,
|
||||||
|
|||||||
@@ -7,8 +7,6 @@ use std::collections::HashMap;
|
|||||||
|
|
||||||
use crate::model_family::ModelFamily;
|
use crate::model_family::ModelFamily;
|
||||||
use crate::plan_tool::PLAN_TOOL;
|
use crate::plan_tool::PLAN_TOOL;
|
||||||
use crate::protocol::AskForApproval;
|
|
||||||
use crate::protocol::SandboxPolicy;
|
|
||||||
use crate::tool_apply_patch::ApplyPatchToolType;
|
use crate::tool_apply_patch::ApplyPatchToolType;
|
||||||
use crate::tool_apply_patch::create_apply_patch_freeform_tool;
|
use crate::tool_apply_patch::create_apply_patch_freeform_tool;
|
||||||
use crate::tool_apply_patch::create_apply_patch_json_tool;
|
use crate::tool_apply_patch::create_apply_patch_json_tool;
|
||||||
@@ -57,10 +55,9 @@ pub(crate) enum OpenAiTool {
|
|||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum ConfigShellToolType {
|
pub enum ConfigShellToolType {
|
||||||
DefaultShell,
|
Default,
|
||||||
ShellWithRequest { sandbox_policy: SandboxPolicy },
|
Local,
|
||||||
LocalShell,
|
Streamable,
|
||||||
StreamableShell,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@@ -75,8 +72,6 @@ pub(crate) struct ToolsConfig {
|
|||||||
|
|
||||||
pub(crate) struct ToolsConfigParams<'a> {
|
pub(crate) struct ToolsConfigParams<'a> {
|
||||||
pub(crate) model_family: &'a ModelFamily,
|
pub(crate) model_family: &'a ModelFamily,
|
||||||
pub(crate) approval_policy: AskForApproval,
|
|
||||||
pub(crate) sandbox_policy: SandboxPolicy,
|
|
||||||
pub(crate) include_plan_tool: bool,
|
pub(crate) include_plan_tool: bool,
|
||||||
pub(crate) include_apply_patch_tool: bool,
|
pub(crate) include_apply_patch_tool: bool,
|
||||||
pub(crate) include_web_search_request: bool,
|
pub(crate) include_web_search_request: bool,
|
||||||
@@ -89,8 +84,6 @@ impl ToolsConfig {
|
|||||||
pub fn new(params: &ToolsConfigParams) -> Self {
|
pub fn new(params: &ToolsConfigParams) -> Self {
|
||||||
let ToolsConfigParams {
|
let ToolsConfigParams {
|
||||||
model_family,
|
model_family,
|
||||||
approval_policy,
|
|
||||||
sandbox_policy,
|
|
||||||
include_plan_tool,
|
include_plan_tool,
|
||||||
include_apply_patch_tool,
|
include_apply_patch_tool,
|
||||||
include_web_search_request,
|
include_web_search_request,
|
||||||
@@ -98,18 +91,13 @@ impl ToolsConfig {
|
|||||||
include_view_image_tool,
|
include_view_image_tool,
|
||||||
experimental_unified_exec_tool,
|
experimental_unified_exec_tool,
|
||||||
} = params;
|
} = params;
|
||||||
let mut shell_type = if *use_streamable_shell_tool {
|
let shell_type = if *use_streamable_shell_tool {
|
||||||
ConfigShellToolType::StreamableShell
|
ConfigShellToolType::Streamable
|
||||||
} else if model_family.uses_local_shell_tool {
|
} else if model_family.uses_local_shell_tool {
|
||||||
ConfigShellToolType::LocalShell
|
ConfigShellToolType::Local
|
||||||
} else {
|
} else {
|
||||||
ConfigShellToolType::DefaultShell
|
ConfigShellToolType::Default
|
||||||
};
|
};
|
||||||
if matches!(approval_policy, AskForApproval::OnRequest) && !use_streamable_shell_tool {
|
|
||||||
shell_type = ConfigShellToolType::ShellWithRequest {
|
|
||||||
sandbox_policy: sandbox_policy.clone(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let apply_patch_tool_type = match model_family.apply_patch_tool_type {
|
let apply_patch_tool_type = match model_family.apply_patch_tool_type {
|
||||||
Some(ApplyPatchToolType::Freeform) => Some(ApplyPatchToolType::Freeform),
|
Some(ApplyPatchToolType::Freeform) => Some(ApplyPatchToolType::Freeform),
|
||||||
@@ -170,40 +158,6 @@ pub(crate) enum JsonSchema {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_shell_tool() -> OpenAiTool {
|
|
||||||
let mut properties = BTreeMap::new();
|
|
||||||
properties.insert(
|
|
||||||
"command".to_string(),
|
|
||||||
JsonSchema::Array {
|
|
||||||
items: Box::new(JsonSchema::String { description: None }),
|
|
||||||
description: Some("The command to execute".to_string()),
|
|
||||||
},
|
|
||||||
);
|
|
||||||
properties.insert(
|
|
||||||
"workdir".to_string(),
|
|
||||||
JsonSchema::String {
|
|
||||||
description: Some("The working directory to execute the command in".to_string()),
|
|
||||||
},
|
|
||||||
);
|
|
||||||
properties.insert(
|
|
||||||
"timeout_ms".to_string(),
|
|
||||||
JsonSchema::Number {
|
|
||||||
description: Some("The timeout for the command in milliseconds".to_string()),
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
OpenAiTool::Function(ResponsesApiTool {
|
|
||||||
name: "shell".to_string(),
|
|
||||||
description: "Runs a shell command and returns its output".to_string(),
|
|
||||||
strict: false,
|
|
||||||
parameters: JsonSchema::Object {
|
|
||||||
properties,
|
|
||||||
required: Some(vec!["command".to_string()]),
|
|
||||||
additional_properties: Some(false),
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn create_unified_exec_tool() -> OpenAiTool {
|
fn create_unified_exec_tool() -> OpenAiTool {
|
||||||
let mut properties = BTreeMap::new();
|
let mut properties = BTreeMap::new();
|
||||||
properties.insert(
|
properties.insert(
|
||||||
@@ -251,7 +205,7 @@ fn create_unified_exec_tool() -> OpenAiTool {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
|
fn create_shell_tool() -> OpenAiTool {
|
||||||
let mut properties = BTreeMap::new();
|
let mut properties = BTreeMap::new();
|
||||||
properties.insert(
|
properties.insert(
|
||||||
"command".to_string(),
|
"command".to_string(),
|
||||||
@@ -273,20 +227,18 @@ fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
if !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
|
properties.insert(
|
||||||
properties.insert(
|
|
||||||
"with_escalated_permissions".to_string(),
|
"with_escalated_permissions".to_string(),
|
||||||
JsonSchema::Boolean {
|
JsonSchema::Boolean {
|
||||||
description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
|
description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
properties.insert(
|
properties.insert(
|
||||||
"justification".to_string(),
|
"justification".to_string(),
|
||||||
JsonSchema::String {
|
JsonSchema::String {
|
||||||
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
|
||||||
|
|
||||||
OpenAiTool::Function(ResponsesApiTool {
|
OpenAiTool::Function(ResponsesApiTool {
|
||||||
name: "shell".to_string(),
|
name: "shell".to_string(),
|
||||||
@@ -537,16 +489,13 @@ pub(crate) fn get_openai_tools(
|
|||||||
tools.push(create_unified_exec_tool());
|
tools.push(create_unified_exec_tool());
|
||||||
} else {
|
} else {
|
||||||
match &config.shell_type {
|
match &config.shell_type {
|
||||||
ConfigShellToolType::DefaultShell => {
|
ConfigShellToolType::Default => {
|
||||||
tools.push(create_shell_tool());
|
tools.push(create_shell_tool());
|
||||||
}
|
}
|
||||||
ConfigShellToolType::ShellWithRequest { sandbox_policy } => {
|
ConfigShellToolType::Local => {
|
||||||
tools.push(create_shell_tool_for_sandbox(sandbox_policy));
|
|
||||||
}
|
|
||||||
ConfigShellToolType::LocalShell => {
|
|
||||||
tools.push(OpenAiTool::LocalShell {});
|
tools.push(OpenAiTool::LocalShell {});
|
||||||
}
|
}
|
||||||
ConfigShellToolType::StreamableShell => {
|
ConfigShellToolType::Streamable => {
|
||||||
tools.push(OpenAiTool::Function(
|
tools.push(OpenAiTool::Function(
|
||||||
crate::exec_command::create_exec_command_tool_for_responses_api(),
|
crate::exec_command::create_exec_command_tool_for_responses_api(),
|
||||||
));
|
));
|
||||||
@@ -636,8 +585,6 @@ mod tests {
|
|||||||
.expect("codex-mini-latest should be a valid model family");
|
.expect("codex-mini-latest should be a valid model family");
|
||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: true,
|
include_plan_tool: true,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -658,8 +605,6 @@ mod tests {
|
|||||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: true,
|
include_plan_tool: true,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -680,8 +625,6 @@ mod tests {
|
|||||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -786,8 +729,6 @@ mod tests {
|
|||||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: false,
|
include_web_search_request: false,
|
||||||
@@ -864,8 +805,6 @@ mod tests {
|
|||||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -927,8 +866,6 @@ mod tests {
|
|||||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -985,8 +922,6 @@ mod tests {
|
|||||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -1046,8 +981,6 @@ mod tests {
|
|||||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -1100,14 +1033,8 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_shell_tool_for_sandbox_workspace_write() {
|
fn test_shell_tool() {
|
||||||
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
|
let tool = super::create_shell_tool();
|
||||||
writable_roots: vec!["workspace".into()],
|
|
||||||
network_access: false,
|
|
||||||
exclude_tmpdir_env_var: false,
|
|
||||||
exclude_slash_tmp: false,
|
|
||||||
};
|
|
||||||
let tool = super::create_shell_tool_for_sandbox(&sandbox_policy);
|
|
||||||
let OpenAiTool::Function(ResponsesApiTool {
|
let OpenAiTool::Function(ResponsesApiTool {
|
||||||
description, name, ..
|
description, name, ..
|
||||||
}) = &tool
|
}) = &tool
|
||||||
@@ -1119,33 +1046,4 @@ mod tests {
|
|||||||
let expected = "Runs a shell command and returns its output.";
|
let expected = "Runs a shell command and returns its output.";
|
||||||
assert_eq!(description, expected);
|
assert_eq!(description, expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_shell_tool_for_sandbox_readonly() {
|
|
||||||
let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::ReadOnly);
|
|
||||||
let OpenAiTool::Function(ResponsesApiTool {
|
|
||||||
description, name, ..
|
|
||||||
}) = &tool
|
|
||||||
else {
|
|
||||||
panic!("expected function tool");
|
|
||||||
};
|
|
||||||
assert_eq!(name, "shell");
|
|
||||||
|
|
||||||
let expected = "Runs a shell command and returns its output.";
|
|
||||||
assert_eq!(description, expected);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_shell_tool_for_sandbox_danger_full_access() {
|
|
||||||
let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::DangerFullAccess);
|
|
||||||
let OpenAiTool::Function(ResponsesApiTool {
|
|
||||||
description, name, ..
|
|
||||||
}) = &tool
|
|
||||||
else {
|
|
||||||
panic!("expected function tool");
|
|
||||||
};
|
|
||||||
assert_eq!(name, "shell");
|
|
||||||
|
|
||||||
assert_eq!(description, "Runs a shell command and returns its output.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user