## Summary A split-up PR of #1763 , stacked on top of a tools refactor #1858 to make the change clearer. From the previous summary: > Let's try something new: tell the model about the sandbox, and let it decide when it will need to break the sandbox. Some local testing suggests that it works pretty well with zero iteration on the prompt! ## Testing - [x] Added unit tests - [x] Tested locally and it appears to work smoothly!
501 lines
17 KiB
Rust
501 lines
17 KiB
Rust
use serde::Deserialize;
|
||
use serde::Serialize;
|
||
use serde_json::json;
|
||
use std::collections::BTreeMap;
|
||
use std::collections::HashMap;
|
||
|
||
use crate::model_family::ModelFamily;
|
||
use crate::plan_tool::PLAN_TOOL;
|
||
use crate::protocol::AskForApproval;
|
||
use crate::protocol::SandboxPolicy;
|
||
|
||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||
pub struct ResponsesApiTool {
|
||
pub(crate) name: String,
|
||
pub(crate) description: String,
|
||
/// TODO: Validation. When strict is set to true, the JSON schema,
|
||
/// `required` and `additional_properties` must be present. All fields in
|
||
/// `properties` must be present in `required`.
|
||
pub(crate) strict: bool,
|
||
pub(crate) parameters: JsonSchema,
|
||
}
|
||
|
||
/// When serialized as JSON, this produces a valid "Tool" in the OpenAI
|
||
/// Responses API.
|
||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||
#[serde(tag = "type")]
|
||
pub(crate) enum OpenAiTool {
|
||
#[serde(rename = "function")]
|
||
Function(ResponsesApiTool),
|
||
#[serde(rename = "local_shell")]
|
||
LocalShell {},
|
||
}
|
||
|
||
#[derive(Debug, Clone)]
|
||
pub enum ConfigShellToolType {
|
||
DefaultShell,
|
||
ShellWithRequest { sandbox_policy: SandboxPolicy },
|
||
LocalShell,
|
||
}
|
||
|
||
#[derive(Debug, Clone)]
|
||
pub struct ToolsConfig {
|
||
pub shell_type: ConfigShellToolType,
|
||
pub plan_tool: bool,
|
||
}
|
||
|
||
impl ToolsConfig {
|
||
pub fn new(
|
||
model_family: &ModelFamily,
|
||
approval_policy: AskForApproval,
|
||
sandbox_policy: SandboxPolicy,
|
||
include_plan_tool: bool,
|
||
) -> Self {
|
||
let mut shell_type = if model_family.uses_local_shell_tool {
|
||
ConfigShellToolType::LocalShell
|
||
} else {
|
||
ConfigShellToolType::DefaultShell
|
||
};
|
||
if matches!(approval_policy, AskForApproval::OnRequest) {
|
||
shell_type = ConfigShellToolType::ShellWithRequest {
|
||
sandbox_policy: sandbox_policy.clone(),
|
||
}
|
||
}
|
||
|
||
Self {
|
||
shell_type,
|
||
plan_tool: include_plan_tool,
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Generic JSON‑Schema subset needed for our tool definitions
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||
#[serde(tag = "type", rename_all = "lowercase")]
|
||
pub(crate) enum JsonSchema {
|
||
Boolean {
|
||
#[serde(skip_serializing_if = "Option::is_none")]
|
||
description: Option<String>,
|
||
},
|
||
String {
|
||
#[serde(skip_serializing_if = "Option::is_none")]
|
||
description: Option<String>,
|
||
},
|
||
Number {
|
||
#[serde(skip_serializing_if = "Option::is_none")]
|
||
description: Option<String>,
|
||
},
|
||
Array {
|
||
items: Box<JsonSchema>,
|
||
|
||
#[serde(skip_serializing_if = "Option::is_none")]
|
||
description: Option<String>,
|
||
},
|
||
Object {
|
||
properties: BTreeMap<String, JsonSchema>,
|
||
#[serde(skip_serializing_if = "Option::is_none")]
|
||
required: Option<Vec<String>>,
|
||
#[serde(
|
||
rename = "additionalProperties",
|
||
skip_serializing_if = "Option::is_none"
|
||
)]
|
||
additional_properties: Option<bool>,
|
||
},
|
||
}
|
||
|
||
fn create_shell_tool() -> OpenAiTool {
|
||
let mut properties = BTreeMap::new();
|
||
properties.insert(
|
||
"command".to_string(),
|
||
JsonSchema::Array {
|
||
items: Box::new(JsonSchema::String { description: None }),
|
||
description: None,
|
||
},
|
||
);
|
||
properties.insert(
|
||
"workdir".to_string(),
|
||
JsonSchema::String { description: None },
|
||
);
|
||
properties.insert(
|
||
"timeout".to_string(),
|
||
JsonSchema::Number { description: None },
|
||
);
|
||
|
||
OpenAiTool::Function(ResponsesApiTool {
|
||
name: "shell".to_string(),
|
||
description: "Runs a shell command and returns its output".to_string(),
|
||
strict: false,
|
||
parameters: JsonSchema::Object {
|
||
properties,
|
||
required: Some(vec!["command".to_string()]),
|
||
additional_properties: Some(false),
|
||
},
|
||
})
|
||
}
|
||
|
||
fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
|
||
let mut properties = BTreeMap::new();
|
||
properties.insert(
|
||
"command".to_string(),
|
||
JsonSchema::Array {
|
||
items: Box::new(JsonSchema::String { description: None }),
|
||
description: Some("The command to execute".to_string()),
|
||
},
|
||
);
|
||
properties.insert(
|
||
"workdir".to_string(),
|
||
JsonSchema::String {
|
||
description: Some("The working directory to execute the command in".to_string()),
|
||
},
|
||
);
|
||
properties.insert(
|
||
"timeout".to_string(),
|
||
JsonSchema::Number {
|
||
description: Some("The timeout for the command in milliseconds".to_string()),
|
||
},
|
||
);
|
||
|
||
if matches!(sandbox_policy, SandboxPolicy::WorkspaceWrite { .. }) {
|
||
properties.insert(
|
||
"with_escalated_permissions".to_string(),
|
||
JsonSchema::Boolean {
|
||
description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
|
||
},
|
||
);
|
||
properties.insert(
|
||
"justification".to_string(),
|
||
JsonSchema::String {
|
||
description: Some("Only set if ask_for_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
||
},
|
||
);
|
||
}
|
||
|
||
let description = match sandbox_policy {
|
||
SandboxPolicy::WorkspaceWrite {
|
||
network_access,
|
||
..
|
||
} => {
|
||
format!(
|
||
r#"
|
||
The shell tool is used to execute shell commands.
|
||
- When invoking the shell tool, your call will be running in a landlock sandbox, and some shell commands will require escalated privileges:
|
||
- Types of actions that require escalated privileges:
|
||
- Reading files outside the current directory
|
||
- Writing files outside the current directory, and protected folders like .git or .env{}
|
||
- Examples of commands that require escalated privileges:
|
||
- git commit
|
||
- npm install or pnpm install
|
||
- cargo build
|
||
- cargo test
|
||
- When invoking a command that will require escalated privileges:
|
||
- Provide the with_escalated_permissions parameter with the boolean value true
|
||
- Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter."#,
|
||
if !network_access {
|
||
"\n - Commands that require network access\n"
|
||
} else {
|
||
""
|
||
}
|
||
)
|
||
}
|
||
SandboxPolicy::DangerFullAccess => {
|
||
"Runs a shell command and returns its output.".to_string()
|
||
}
|
||
SandboxPolicy::ReadOnly => {
|
||
r#"
|
||
The shell tool is used to execute shell commands.
|
||
- When invoking the shell tool, your call will be running in a landlock sandbox, and some shell commands (including apply_patch) will require escalated permissions:
|
||
- Types of actions that require escalated privileges:
|
||
- Reading files outside the current directory
|
||
- Writing files
|
||
- Applying patches
|
||
- Examples of commands that require escalated privileges:
|
||
- apply_patch
|
||
- git commit
|
||
- npm install or pnpm install
|
||
- cargo build
|
||
- cargo test
|
||
- When invoking a command that will require escalated privileges:
|
||
- Provide the with_escalated_permissions parameter with the boolean value true
|
||
- Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter"#.to_string()
|
||
}
|
||
};
|
||
|
||
OpenAiTool::Function(ResponsesApiTool {
|
||
name: "shell".to_string(),
|
||
description,
|
||
strict: false,
|
||
parameters: JsonSchema::Object {
|
||
properties,
|
||
required: Some(vec!["command".to_string()]),
|
||
additional_properties: Some(false),
|
||
},
|
||
})
|
||
}
|
||
|
||
/// Returns JSON values that are compatible with Function Calling in the
|
||
/// Responses API:
|
||
/// https://platform.openai.com/docs/guides/function-calling?api-mode=responses
|
||
pub(crate) fn create_tools_json_for_responses_api(
|
||
tools: &Vec<OpenAiTool>,
|
||
) -> crate::error::Result<Vec<serde_json::Value>> {
|
||
let mut tools_json = Vec::new();
|
||
|
||
for tool in tools {
|
||
tools_json.push(serde_json::to_value(tool)?);
|
||
}
|
||
|
||
Ok(tools_json)
|
||
}
|
||
|
||
/// Returns JSON values that are compatible with Function Calling in the
|
||
/// Chat Completions API:
|
||
/// https://platform.openai.com/docs/guides/function-calling?api-mode=chat
|
||
pub(crate) fn create_tools_json_for_chat_completions_api(
|
||
tools: &Vec<OpenAiTool>,
|
||
) -> crate::error::Result<Vec<serde_json::Value>> {
|
||
// We start with the JSON for the Responses API and than rewrite it to match
|
||
// the chat completions tool call format.
|
||
let responses_api_tools_json = create_tools_json_for_responses_api(tools)?;
|
||
let tools_json = responses_api_tools_json
|
||
.into_iter()
|
||
.filter_map(|mut tool| {
|
||
if tool.get("type") != Some(&serde_json::Value::String("function".to_string())) {
|
||
return None;
|
||
}
|
||
|
||
if let Some(map) = tool.as_object_mut() {
|
||
// Remove "type" field as it is not needed in chat completions.
|
||
map.remove("type");
|
||
Some(json!({
|
||
"type": "function",
|
||
"function": map,
|
||
}))
|
||
} else {
|
||
None
|
||
}
|
||
})
|
||
.collect::<Vec<serde_json::Value>>();
|
||
Ok(tools_json)
|
||
}
|
||
|
||
pub(crate) fn mcp_tool_to_openai_tool(
|
||
fully_qualified_name: String,
|
||
tool: mcp_types::Tool,
|
||
) -> Result<ResponsesApiTool, serde_json::Error> {
|
||
let mcp_types::Tool {
|
||
description,
|
||
mut input_schema,
|
||
..
|
||
} = tool;
|
||
|
||
// OpenAI models mandate the "properties" field in the schema. The Agents
|
||
// SDK fixed this by inserting an empty object for "properties" if it is not
|
||
// already present https://github.com/openai/openai-agents-python/issues/449
|
||
// so here we do the same.
|
||
if input_schema.properties.is_none() {
|
||
input_schema.properties = Some(serde_json::Value::Object(serde_json::Map::new()));
|
||
}
|
||
|
||
let serialized_input_schema = serde_json::to_value(input_schema)?;
|
||
let input_schema = serde_json::from_value::<JsonSchema>(serialized_input_schema)?;
|
||
|
||
Ok(ResponsesApiTool {
|
||
name: fully_qualified_name,
|
||
description: description.unwrap_or_default(),
|
||
strict: false,
|
||
parameters: input_schema,
|
||
})
|
||
}
|
||
|
||
/// Returns a list of OpenAiTools based on the provided config and MCP tools.
|
||
/// Note that the keys of mcp_tools should be fully qualified names. See
|
||
/// [`McpConnectionManager`] for more details.
|
||
pub(crate) fn get_openai_tools(
|
||
config: &ToolsConfig,
|
||
mcp_tools: Option<HashMap<String, mcp_types::Tool>>,
|
||
) -> Vec<OpenAiTool> {
|
||
let mut tools: Vec<OpenAiTool> = Vec::new();
|
||
|
||
match &config.shell_type {
|
||
ConfigShellToolType::DefaultShell => {
|
||
tools.push(create_shell_tool());
|
||
}
|
||
ConfigShellToolType::ShellWithRequest { sandbox_policy } => {
|
||
tools.push(create_shell_tool_for_sandbox(sandbox_policy));
|
||
}
|
||
ConfigShellToolType::LocalShell => {
|
||
tools.push(OpenAiTool::LocalShell {});
|
||
}
|
||
}
|
||
|
||
if config.plan_tool {
|
||
tools.push(PLAN_TOOL.clone());
|
||
}
|
||
|
||
if let Some(mcp_tools) = mcp_tools {
|
||
for (name, tool) in mcp_tools {
|
||
match mcp_tool_to_openai_tool(name.clone(), tool.clone()) {
|
||
Ok(converted_tool) => tools.push(OpenAiTool::Function(converted_tool)),
|
||
Err(e) => {
|
||
tracing::error!("Failed to convert {name:?} MCP tool to OpenAI tool: {e:?}");
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
tools
|
||
}
|
||
|
||
#[cfg(test)]
|
||
#[allow(clippy::expect_used)]
|
||
mod tests {
|
||
use crate::model_family::find_family_for_model;
|
||
use mcp_types::ToolInputSchema;
|
||
|
||
use super::*;
|
||
|
||
fn assert_eq_tool_names(tools: &[OpenAiTool], expected_names: &[&str]) {
|
||
let tool_names = tools
|
||
.iter()
|
||
.map(|tool| match tool {
|
||
OpenAiTool::Function(ResponsesApiTool { name, .. }) => name,
|
||
OpenAiTool::LocalShell {} => "local_shell",
|
||
})
|
||
.collect::<Vec<_>>();
|
||
|
||
assert_eq!(
|
||
tool_names.len(),
|
||
expected_names.len(),
|
||
"tool_name mismatch, {tool_names:?}, {expected_names:?}",
|
||
);
|
||
for (name, expected_name) in tool_names.iter().zip(expected_names.iter()) {
|
||
assert_eq!(
|
||
name, expected_name,
|
||
"tool_name mismatch, {name:?}, {expected_name:?}"
|
||
);
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn test_get_openai_tools() {
|
||
let model_family = find_family_for_model("codex-mini-latest")
|
||
.expect("codex-mini-latest should be a valid model family");
|
||
let config = ToolsConfig::new(
|
||
&model_family,
|
||
AskForApproval::Never,
|
||
SandboxPolicy::ReadOnly,
|
||
true,
|
||
);
|
||
let tools = get_openai_tools(&config, Some(HashMap::new()));
|
||
|
||
assert_eq_tool_names(&tools, &["local_shell", "update_plan"]);
|
||
}
|
||
|
||
#[test]
|
||
fn test_get_openai_tools_default_shell() {
|
||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||
let config = ToolsConfig::new(
|
||
&model_family,
|
||
AskForApproval::Never,
|
||
SandboxPolicy::ReadOnly,
|
||
true,
|
||
);
|
||
let tools = get_openai_tools(&config, Some(HashMap::new()));
|
||
|
||
assert_eq_tool_names(&tools, &["shell", "update_plan"]);
|
||
}
|
||
|
||
#[test]
|
||
fn test_get_openai_tools_mcp_tools() {
|
||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||
let config = ToolsConfig::new(
|
||
&model_family,
|
||
AskForApproval::Never,
|
||
SandboxPolicy::ReadOnly,
|
||
false,
|
||
);
|
||
let tools = get_openai_tools(
|
||
&config,
|
||
Some(HashMap::from([(
|
||
"test_server/do_something_cool".to_string(),
|
||
mcp_types::Tool {
|
||
name: "do_something_cool".to_string(),
|
||
input_schema: ToolInputSchema {
|
||
properties: Some(serde_json::json!({
|
||
"string_argument": {
|
||
"type": "string",
|
||
},
|
||
"number_argument": {
|
||
"type": "number",
|
||
},
|
||
"object_argument": {
|
||
"type": "object",
|
||
"properties": {
|
||
"string_property": { "type": "string" },
|
||
"number_property": { "type": "number" },
|
||
},
|
||
"required": [
|
||
"string_property",
|
||
"number_property"
|
||
],
|
||
"additionalProperties": Some(false),
|
||
},
|
||
})),
|
||
required: None,
|
||
r#type: "object".to_string(),
|
||
},
|
||
output_schema: None,
|
||
title: None,
|
||
annotations: None,
|
||
description: Some("Do something cool".to_string()),
|
||
},
|
||
)])),
|
||
);
|
||
|
||
assert_eq_tool_names(&tools, &["shell", "test_server/do_something_cool"]);
|
||
|
||
assert_eq!(
|
||
tools[1],
|
||
OpenAiTool::Function(ResponsesApiTool {
|
||
name: "test_server/do_something_cool".to_string(),
|
||
parameters: JsonSchema::Object {
|
||
properties: BTreeMap::from([
|
||
(
|
||
"string_argument".to_string(),
|
||
JsonSchema::String { description: None }
|
||
),
|
||
(
|
||
"number_argument".to_string(),
|
||
JsonSchema::Number { description: None }
|
||
),
|
||
(
|
||
"object_argument".to_string(),
|
||
JsonSchema::Object {
|
||
properties: BTreeMap::from([
|
||
(
|
||
"string_property".to_string(),
|
||
JsonSchema::String { description: None }
|
||
),
|
||
(
|
||
"number_property".to_string(),
|
||
JsonSchema::Number { description: None }
|
||
),
|
||
]),
|
||
required: Some(vec![
|
||
"string_property".to_string(),
|
||
"number_property".to_string(),
|
||
]),
|
||
additional_properties: Some(false),
|
||
},
|
||
),
|
||
]),
|
||
required: None,
|
||
additional_properties: None,
|
||
},
|
||
description: "Do something cool".to_string(),
|
||
strict: false,
|
||
})
|
||
);
|
||
}
|
||
}
|