Simplify tool implemetations (#4160)

Use Result<String, FunctionCallError> for all tool handling code and
rely on error propagation instead of creating failed items everywhere.
This commit is contained in:
pakrym-oai
2025-09-24 10:27:35 -07:00
committed by GitHub
parent bffdbec2c5
commit addc946d13
7 changed files with 223 additions and 391 deletions

View File

@@ -10,6 +10,7 @@ use std::time::Duration;
use crate::AuthManager;
use crate::client_common::REVIEW_PROMPT;
use crate::event_mapping::map_response_item_to_event_messages;
use crate::function_tool::FunctionCallError;
use crate::review_format::format_review_findings_block;
use crate::user_notification::UserNotifier;
use async_channel::Receiver;
@@ -2232,18 +2233,41 @@ async fn handle_response_item(
..
} => {
info!("FunctionCall: {name}({arguments})");
Some(
handle_function_call(
if let Some((server, tool_name)) = sess.mcp_connection_manager.parse_tool_name(&name) {
let resp = handle_mcp_tool_call(
sess,
sub_id,
call_id.clone(),
server,
tool_name,
arguments,
)
.await;
Some(resp)
} else {
let result = handle_function_call(
sess,
turn_context,
turn_diff_tracker,
sub_id.to_string(),
name,
arguments,
call_id,
call_id.clone(),
)
.await,
)
.await;
let output = match result {
Ok(content) => FunctionCallOutputPayload {
content,
success: Some(true),
},
Err(FunctionCallError::RespondToModel(msg)) => FunctionCallOutputPayload {
content: msg,
success: Some(false),
},
};
Some(ResponseInputItem::FunctionCallOutput { call_id, output })
}
}
ResponseItem::LocalShellCall {
id,
@@ -2276,17 +2300,32 @@ async fn handle_response_item(
};
let exec_params = to_exec_params(params, turn_context);
Some(
handle_container_exec_with_params(
{
let result = handle_container_exec_with_params(
exec_params,
sess,
turn_context,
turn_diff_tracker,
sub_id.to_string(),
effective_call_id,
effective_call_id.clone(),
)
.await,
)
.await;
let output = match result {
Ok(content) => FunctionCallOutputPayload {
content,
success: Some(true),
},
Err(FunctionCallError::RespondToModel(msg)) => FunctionCallOutputPayload {
content: msg,
success: Some(false),
},
};
Some(ResponseInputItem::FunctionCallOutput {
call_id: effective_call_id,
output,
})
}
}
ResponseItem::CustomToolCall {
id: _,
@@ -2294,18 +2333,24 @@ async fn handle_response_item(
name,
input,
status: _,
} => Some(
handle_custom_tool_call(
} => {
let result = handle_custom_tool_call(
sess,
turn_context,
turn_diff_tracker,
sub_id.to_string(),
name,
input,
call_id,
call_id.clone(),
)
.await,
),
.await;
let output = match result {
Ok(content) => content,
Err(FunctionCallError::RespondToModel(msg)) => msg,
};
Some(ResponseInputItem::CustomToolCallOutput { call_id, output })
}
ResponseItem::FunctionCallOutput { .. } => {
debug!("unexpected FunctionCallOutput from stream");
None
@@ -2342,22 +2387,17 @@ async fn handle_response_item(
async fn handle_unified_exec_tool_call(
sess: &Session,
call_id: String,
session_id: Option<String>,
arguments: Vec<String>,
timeout_ms: Option<u64>,
) -> ResponseInputItem {
) -> Result<String, FunctionCallError> {
let parsed_session_id = if let Some(session_id) = session_id {
match session_id.parse::<i32>() {
Ok(parsed) => Some(parsed),
Err(output) => {
return ResponseInputItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload {
content: format!("invalid session_id: {session_id} due to error {output}"),
success: Some(false),
},
};
return Err(FunctionCallError::RespondToModel(format!(
"invalid session_id: {session_id} due to error {output:?}"
)));
}
}
} else {
@@ -2370,40 +2410,29 @@ async fn handle_unified_exec_tool_call(
timeout_ms,
};
let result = sess.unified_exec_manager.handle_request(request).await;
let value = sess
.unified_exec_manager
.handle_request(request)
.await
.map_err(|err| {
FunctionCallError::RespondToModel(format!("unified exec failed: {err:?}"))
})?;
let output_payload = match result {
Ok(value) => {
#[derive(Serialize)]
struct SerializedUnifiedExecResult<'a> {
session_id: Option<String>,
output: &'a str,
}
match serde_json::to_string(&SerializedUnifiedExecResult {
session_id: value.session_id.map(|id| id.to_string()),
output: &value.output,
}) {
Ok(serialized) => FunctionCallOutputPayload {
content: serialized,
success: Some(true),
},
Err(err) => FunctionCallOutputPayload {
content: format!("failed to serialize unified exec output: {err}"),
success: Some(false),
},
}
}
Err(err) => FunctionCallOutputPayload {
content: format!("unified exec failed: {err}"),
success: Some(false),
},
};
ResponseInputItem::FunctionCallOutput {
call_id,
output: output_payload,
#[derive(Serialize)]
struct SerializedUnifiedExecResult {
session_id: Option<String>,
output: String,
}
serde_json::to_string(&SerializedUnifiedExecResult {
session_id: value.session_id.map(|id| id.to_string()),
output: value.output,
})
.map_err(|err| {
FunctionCallError::RespondToModel(format!(
"failed to serialize unified exec output: {err:?}"
))
})
}
async fn handle_function_call(
@@ -2414,15 +2443,10 @@ async fn handle_function_call(
name: String,
arguments: String,
call_id: String,
) -> ResponseInputItem {
) -> Result<String, FunctionCallError> {
match name.as_str() {
"container.exec" | "shell" => {
let params = match parse_container_exec_arguments(arguments, turn_context, &call_id) {
Ok(params) => params,
Err(output) => {
return *output;
}
};
let params = parse_container_exec_arguments(arguments, turn_context, &call_id)?;
handle_container_exec_with_params(
params,
sess,
@@ -2443,74 +2467,41 @@ async fn handle_function_call(
timeout_ms: Option<u64>,
}
let args = match serde_json::from_str::<UnifiedExecArgs>(&arguments) {
Ok(args) => args,
Err(err) => {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("failed to parse function arguments: {err}"),
success: Some(false),
},
};
}
};
let args: UnifiedExecArgs = serde_json::from_str(&arguments).map_err(|err| {
FunctionCallError::RespondToModel(format!(
"failed to parse function arguments: {err:?}"
))
})?;
handle_unified_exec_tool_call(
sess,
call_id,
args.session_id,
args.input,
args.timeout_ms,
)
.await
handle_unified_exec_tool_call(sess, args.session_id, args.input, args.timeout_ms).await
}
"view_image" => {
#[derive(serde::Deserialize)]
struct SeeImageArgs {
path: String,
}
let args = match serde_json::from_str::<SeeImageArgs>(&arguments) {
Ok(a) => a,
Err(e) => {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("failed to parse function arguments: {e}"),
success: Some(false),
},
};
}
};
let args: SeeImageArgs = serde_json::from_str(&arguments).map_err(|e| {
FunctionCallError::RespondToModel(format!(
"failed to parse function arguments: {e:?}"
))
})?;
let abs = turn_context.resolve_path(Some(args.path));
let output = match sess
.inject_input(vec![InputItem::LocalImage { path: abs }])
sess.inject_input(vec![InputItem::LocalImage { path: abs }])
.await
{
Ok(()) => FunctionCallOutputPayload {
content: "attached local image path".to_string(),
success: Some(true),
},
Err(_) => FunctionCallOutputPayload {
content: "unable to attach image (no active task)".to_string(),
success: Some(false),
},
};
ResponseInputItem::FunctionCallOutput { call_id, output }
.map_err(|_| {
FunctionCallError::RespondToModel(
"unable to attach image (no active task)".to_string(),
)
})?;
Ok("attached local image path".to_string())
}
"apply_patch" => {
let args = match serde_json::from_str::<ApplyPatchToolArgs>(&arguments) {
Ok(a) => a,
Err(e) => {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("failed to parse function arguments: {e}"),
success: None,
},
};
}
};
let args: ApplyPatchToolArgs = serde_json::from_str(&arguments).map_err(|e| {
FunctionCallError::RespondToModel(format!(
"failed to parse function arguments: {e:?}"
))
})?;
let exec_params = ExecParams {
command: vec!["apply_patch".to_string(), args.input.clone()],
cwd: turn_context.cwd.clone(),
@@ -2532,69 +2523,39 @@ async fn handle_function_call(
"update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
EXEC_COMMAND_TOOL_NAME => {
// TODO(mbolin): Sandbox check.
let exec_params = match serde_json::from_str::<ExecCommandParams>(&arguments) {
Ok(params) => params,
Err(e) => {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("failed to parse function arguments: {e}"),
success: Some(false),
},
};
}
};
let exec_params: ExecCommandParams = serde_json::from_str(&arguments).map_err(|e| {
FunctionCallError::RespondToModel(format!(
"failed to parse function arguments: {e:?}"
))
})?;
let result = sess
.session_manager
.handle_exec_command_request(exec_params)
.await;
let function_call_output = crate::exec_command::result_into_payload(result);
ResponseInputItem::FunctionCallOutput {
call_id,
output: function_call_output,
match result {
Ok(output) => Ok(output.to_text_output()),
Err(err) => Err(FunctionCallError::RespondToModel(err)),
}
}
WRITE_STDIN_TOOL_NAME => {
let write_stdin_params = match serde_json::from_str::<WriteStdinParams>(&arguments) {
Ok(params) => params,
Err(e) => {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("failed to parse function arguments: {e}"),
success: Some(false),
},
};
}
};
let write_stdin_params =
serde_json::from_str::<WriteStdinParams>(&arguments).map_err(|e| {
FunctionCallError::RespondToModel(format!(
"failed to parse function arguments: {e:?}"
))
})?;
let result = sess
.session_manager
.handle_write_stdin_request(write_stdin_params)
.await;
let function_call_output: FunctionCallOutputPayload =
crate::exec_command::result_into_payload(result);
ResponseInputItem::FunctionCallOutput {
call_id,
output: function_call_output,
}
}
_ => {
match sess.mcp_connection_manager.parse_tool_name(&name) {
Some((server, tool_name)) => {
handle_mcp_tool_call(sess, &sub_id, call_id, server, tool_name, arguments).await
}
None => {
// Unknown function: reply with structured failure so the model can adapt.
ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("unsupported call: {name}"),
success: None,
},
}
}
}
.await
.map_err(FunctionCallError::RespondToModel)?;
Ok(result.to_text_output())
}
_ => Err(FunctionCallError::RespondToModel(format!(
"unsupported call: {name}"
))),
}
}
@@ -2606,7 +2567,7 @@ async fn handle_custom_tool_call(
name: String,
input: String,
call_id: String,
) -> ResponseInputItem {
) -> Result<String, FunctionCallError> {
info!("CustomToolCall: {name} {input}");
match name.as_str() {
"apply_patch" => {
@@ -2618,7 +2579,8 @@ async fn handle_custom_tool_call(
with_escalated_permissions: None,
justification: None,
};
let resp = handle_container_exec_with_params(
handle_container_exec_with_params(
exec_params,
sess,
turn_context,
@@ -2626,26 +2588,13 @@ async fn handle_custom_tool_call(
sub_id,
call_id,
)
.await;
// Convert function-call style output into a custom tool call output
match resp {
ResponseInputItem::FunctionCallOutput { call_id, output } => {
ResponseInputItem::CustomToolCallOutput {
call_id,
output: output.content,
}
}
// Pass through if already a custom tool output or other variant
other => other,
}
.await
}
_ => {
debug!("unexpected CustomToolCall from stream");
ResponseInputItem::CustomToolCallOutput {
call_id,
output: format!("unsupported custom tool call: {name}"),
}
Err(FunctionCallError::RespondToModel(format!(
"unsupported custom tool call: {name}"
)))
}
}
}
@@ -2664,23 +2613,13 @@ fn to_exec_params(params: ShellToolCallParams, turn_context: &TurnContext) -> Ex
fn parse_container_exec_arguments(
arguments: String,
turn_context: &TurnContext,
call_id: &str,
) -> Result<ExecParams, Box<ResponseInputItem>> {
// parse command
match serde_json::from_str::<ShellToolCallParams>(&arguments) {
Ok(shell_tool_call_params) => Ok(to_exec_params(shell_tool_call_params, turn_context)),
Err(e) => {
// allow model to re-sample
let output = ResponseInputItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload {
content: format!("failed to parse function arguments: {e}"),
success: None,
},
};
Err(Box::new(output))
}
}
_call_id: &str,
) -> Result<ExecParams, FunctionCallError> {
serde_json::from_str::<ShellToolCallParams>(&arguments)
.map(|p| to_exec_params(p, turn_context))
.map_err(|e| {
FunctionCallError::RespondToModel(format!("failed to parse function arguments: {e:?}"))
})
}
pub struct ExecInvokeArgs<'a> {
@@ -2717,20 +2656,14 @@ async fn handle_container_exec_with_params(
turn_diff_tracker: &mut TurnDiffTracker,
sub_id: String,
call_id: String,
) -> ResponseInputItem {
) -> Result<String, FunctionCallError> {
if params.with_escalated_permissions.unwrap_or(false)
&& !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
{
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!(
"approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
policy = turn_context.approval_policy
),
success: None,
},
};
return Err(FunctionCallError::RespondToModel(format!(
"approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
policy = turn_context.approval_policy
)));
}
// check if this was a patch, and apply it if so
@@ -2747,13 +2680,9 @@ async fn handle_container_exec_with_params(
// It looks like an invocation of `apply_patch`, but we
// could not resolve it into a patch that would apply
// cleanly. Return to model for resample.
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("error: {parse_error:#}"),
success: None,
},
};
return Err(FunctionCallError::RespondToModel(format!(
"error: {parse_error:#?}"
)));
}
MaybeApplyPatchVerified::ShellParseError(error) => {
trace!("Failed to parse shell command, {error:?}");
@@ -2771,13 +2700,9 @@ async fn handle_container_exec_with_params(
.ok()
.map(|p| p.to_string_lossy().to_string());
let Some(path_to_codex) = path_to_codex else {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: "failed to determine path to codex executable".to_string(),
success: None,
},
};
return Err(FunctionCallError::RespondToModel(
"failed to determine path to codex executable".to_string(),
));
};
let params = ExecParams {
@@ -2843,13 +2768,9 @@ async fn handle_container_exec_with_params(
sess.add_approved_command(params.command.clone()).await;
}
ReviewDecision::Denied | ReviewDecision::Abort => {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: "exec command rejected by user".to_string(),
success: None,
},
};
return Err(FunctionCallError::RespondToModel(
"exec command rejected by user".to_string(),
));
}
}
// No sandboxing is applied because the user has given
@@ -2859,13 +2780,9 @@ async fn handle_container_exec_with_params(
SandboxType::None
}
SafetyCheck::Reject { reason } => {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("exec command rejected: {reason}"),
success: None,
},
};
return Err(FunctionCallError::RespondToModel(format!(
"exec command rejected: {reason:?}"
)));
}
};
@@ -2912,15 +2829,11 @@ async fn handle_container_exec_with_params(
match output_result {
Ok(output) => {
let ExecToolCallOutput { exit_code, .. } = &output;
let is_success = *exit_code == 0;
let content = format_exec_output(&output);
ResponseInputItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content,
success: Some(is_success),
},
if *exit_code == 0 {
Ok(content)
} else {
Err(FunctionCallError::RespondToModel(content))
}
}
Err(CodexErr::Sandbox(error)) => {
@@ -2935,13 +2848,9 @@ async fn handle_container_exec_with_params(
)
.await
}
Err(e) => ResponseInputItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content: format!("execution error: {e}"),
success: None,
},
},
Err(e) => Err(FunctionCallError::RespondToModel(format!(
"execution error: {e:?}"
))),
}
}
@@ -2953,35 +2862,23 @@ async fn handle_sandbox_error(
sandbox_type: SandboxType,
sess: &Session,
turn_context: &TurnContext,
) -> ResponseInputItem {
) -> Result<String, FunctionCallError> {
let call_id = exec_command_context.call_id.clone();
let sub_id = exec_command_context.sub_id.clone();
let cwd = exec_command_context.cwd.clone();
if let SandboxErr::Timeout { output } = &error {
let content = format_exec_output(output);
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content,
success: Some(false),
},
};
return Err(FunctionCallError::RespondToModel(content));
}
// Early out if either the user never wants to be asked for approval, or
// we're letting the model manage escalation requests. Otherwise, continue
match turn_context.approval_policy {
AskForApproval::Never | AskForApproval::OnRequest => {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!(
"failed in sandbox {sandbox_type:?} with execution error: {error}"
),
success: Some(false),
},
};
return Err(FunctionCallError::RespondToModel(format!(
"failed in sandbox {sandbox_type:?} with execution error: {error:?}"
)));
}
AskForApproval::UnlessTrusted | AskForApproval::OnFailure => (),
}
@@ -3047,36 +2944,23 @@ async fn handle_sandbox_error(
match retry_output_result {
Ok(retry_output) => {
let ExecToolCallOutput { exit_code, .. } = &retry_output;
let is_success = *exit_code == 0;
let content = format_exec_output(&retry_output);
ResponseInputItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content,
success: Some(is_success),
},
if *exit_code == 0 {
Ok(content)
} else {
Err(FunctionCallError::RespondToModel(content))
}
}
Err(e) => ResponseInputItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content: format!("retry failed: {e}"),
success: None,
},
},
Err(e) => Err(FunctionCallError::RespondToModel(format!(
"retry failed: {e}"
))),
}
}
ReviewDecision::Denied | ReviewDecision::Abort => {
// Fall through to original failure handling.
ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: "exec command rejected by user".to_string(),
success: None,
},
}
Err(FunctionCallError::RespondToModel(
"exec command rejected by user".to_string(),
))
}
}
}
@@ -3798,8 +3682,8 @@ mod tests {
)
.await;
let ResponseInputItem::FunctionCallOutput { output, .. } = resp else {
panic!("expected FunctionCallOutput");
let Err(FunctionCallError::RespondToModel(output)) = resp else {
panic!("expected error result");
};
let expected = format!(
@@ -3807,7 +3691,7 @@ mod tests {
policy = turn_context.approval_policy
);
pretty_assertions::assert_eq!(output.content, expected);
pretty_assertions::assert_eq!(output, expected);
// Now retry the same command WITHOUT escalated permissions; should succeed.
// Force DangerFullAccess to avoid platform sandbox dependencies in tests.
@@ -3823,9 +3707,7 @@ mod tests {
)
.await;
let ResponseInputItem::FunctionCallOutput { output, .. } = resp2 else {
panic!("expected FunctionCallOutput on retry");
};
let output = resp2.expect("expected Ok result");
#[derive(Deserialize, PartialEq, Eq, Debug)]
struct ResponseExecMetadata {
@@ -3839,10 +3721,9 @@ mod tests {
}
let exec_output: ResponseExecOutput =
serde_json::from_str(&output.content).expect("valid exec output json");
serde_json::from_str(&output).expect("valid exec output json");
pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
assert!(exec_output.output.contains("hi"));
pretty_assertions::assert_eq!(output.success, Some(true));
}
}