feat: grep_files as a tool (#4820)

Add `grep_files` to be able to perform more action in parallel
This commit is contained in:
jif-oai
2025-10-08 11:02:50 +01:00
committed by GitHub
parent a43ae86b6c
commit f52320be86
7 changed files with 591 additions and 9 deletions

View File

@@ -0,0 +1,272 @@
use std::path::Path;
use std::time::Duration;
use async_trait::async_trait;
use serde::Deserialize;
use tokio::process::Command;
use tokio::time::timeout;
use crate::function_tool::FunctionCallError;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
pub struct GrepFilesHandler;
const DEFAULT_LIMIT: usize = 100;
const MAX_LIMIT: usize = 2000;
const COMMAND_TIMEOUT: Duration = Duration::from_secs(30);
fn default_limit() -> usize {
DEFAULT_LIMIT
}
#[derive(Deserialize)]
struct GrepFilesArgs {
pattern: String,
#[serde(default)]
include: Option<String>,
#[serde(default)]
path: Option<String>,
#[serde(default = "default_limit")]
limit: usize,
}
#[async_trait]
impl ToolHandler for GrepFilesHandler {
fn kind(&self) -> ToolKind {
ToolKind::Function
}
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
let ToolInvocation { payload, turn, .. } = invocation;
let arguments = match payload {
ToolPayload::Function { arguments } => arguments,
_ => {
return Err(FunctionCallError::RespondToModel(
"grep_files handler received unsupported payload".to_string(),
));
}
};
let args: GrepFilesArgs = serde_json::from_str(&arguments).map_err(|err| {
FunctionCallError::RespondToModel(format!(
"failed to parse function arguments: {err:?}"
))
})?;
let pattern = args.pattern.trim();
if pattern.is_empty() {
return Err(FunctionCallError::RespondToModel(
"pattern must not be empty".to_string(),
));
}
if args.limit == 0 {
return Err(FunctionCallError::RespondToModel(
"limit must be greater than zero".to_string(),
));
}
let limit = args.limit.min(MAX_LIMIT);
let search_path = turn.resolve_path(args.path.clone());
verify_path_exists(&search_path).await?;
let include = args.include.as_deref().map(str::trim).and_then(|val| {
if val.is_empty() {
None
} else {
Some(val.to_string())
}
});
let search_results =
run_rg_search(pattern, include.as_deref(), &search_path, limit, &turn.cwd).await?;
if search_results.is_empty() {
Ok(ToolOutput::Function {
content: "No matches found.".to_string(),
success: Some(false),
})
} else {
Ok(ToolOutput::Function {
content: search_results.join("\n"),
success: Some(true),
})
}
}
}
async fn verify_path_exists(path: &Path) -> Result<(), FunctionCallError> {
tokio::fs::metadata(path).await.map_err(|err| {
FunctionCallError::RespondToModel(format!("unable to access `{}`: {err}", path.display()))
})?;
Ok(())
}
async fn run_rg_search(
pattern: &str,
include: Option<&str>,
search_path: &Path,
limit: usize,
cwd: &Path,
) -> Result<Vec<String>, FunctionCallError> {
let mut command = Command::new("rg");
command
.current_dir(cwd)
.arg("--files-with-matches")
.arg("--sortr=modified")
.arg("--regexp")
.arg(pattern)
.arg("--no-messages");
if let Some(glob) = include {
command.arg("--glob").arg(glob);
}
command.arg("--").arg(search_path);
let output = timeout(COMMAND_TIMEOUT, command.output())
.await
.map_err(|_| {
FunctionCallError::RespondToModel("rg timed out after 30 seconds".to_string())
})?
.map_err(|err| {
FunctionCallError::RespondToModel(format!(
"failed to launch rg: {err}. Ensure ripgrep is installed and on PATH."
))
})?;
match output.status.code() {
Some(0) => Ok(parse_results(&output.stdout, limit)),
Some(1) => Ok(Vec::new()),
_ => {
let stderr = String::from_utf8_lossy(&output.stderr);
Err(FunctionCallError::RespondToModel(format!(
"rg failed: {stderr}"
)))
}
}
}
fn parse_results(stdout: &[u8], limit: usize) -> Vec<String> {
let mut results = Vec::new();
for line in stdout.split(|byte| *byte == b'\n') {
if line.is_empty() {
continue;
}
if let Ok(text) = std::str::from_utf8(line) {
if text.is_empty() {
continue;
}
results.push(text.to_string());
if results.len() == limit {
break;
}
}
}
results
}
#[cfg(test)]
mod tests {
use super::*;
use std::process::Command as StdCommand;
use tempfile::tempdir;
#[test]
fn parses_basic_results() {
let stdout = b"/tmp/file_a.rs\n/tmp/file_b.rs\n";
let parsed = parse_results(stdout, 10);
assert_eq!(
parsed,
vec!["/tmp/file_a.rs".to_string(), "/tmp/file_b.rs".to_string()]
);
}
#[test]
fn parse_truncates_after_limit() {
let stdout = b"/tmp/file_a.rs\n/tmp/file_b.rs\n/tmp/file_c.rs\n";
let parsed = parse_results(stdout, 2);
assert_eq!(
parsed,
vec!["/tmp/file_a.rs".to_string(), "/tmp/file_b.rs".to_string()]
);
}
#[tokio::test]
async fn run_search_returns_results() -> anyhow::Result<()> {
if !rg_available() {
return Ok(());
}
let temp = tempdir().expect("create temp dir");
let dir = temp.path();
std::fs::write(dir.join("match_one.txt"), "alpha beta gamma").unwrap();
std::fs::write(dir.join("match_two.txt"), "alpha delta").unwrap();
std::fs::write(dir.join("other.txt"), "omega").unwrap();
let results = run_rg_search("alpha", None, dir, 10, dir).await?;
assert_eq!(results.len(), 2);
assert!(results.iter().any(|path| path.ends_with("match_one.txt")));
assert!(results.iter().any(|path| path.ends_with("match_two.txt")));
Ok(())
}
#[tokio::test]
async fn run_search_with_glob_filter() -> anyhow::Result<()> {
if !rg_available() {
return Ok(());
}
let temp = tempdir().expect("create temp dir");
let dir = temp.path();
std::fs::write(dir.join("match_one.rs"), "alpha beta gamma").unwrap();
std::fs::write(dir.join("match_two.txt"), "alpha delta").unwrap();
let results = run_rg_search("alpha", Some("*.rs"), dir, 10, dir).await?;
assert_eq!(results.len(), 1);
assert!(results.iter().all(|path| path.ends_with("match_one.rs")));
Ok(())
}
#[tokio::test]
async fn run_search_respects_limit() -> anyhow::Result<()> {
if !rg_available() {
return Ok(());
}
let temp = tempdir().expect("create temp dir");
let dir = temp.path();
std::fs::write(dir.join("one.txt"), "alpha one").unwrap();
std::fs::write(dir.join("two.txt"), "alpha two").unwrap();
std::fs::write(dir.join("three.txt"), "alpha three").unwrap();
let results = run_rg_search("alpha", None, dir, 2, dir).await?;
assert_eq!(results.len(), 2);
Ok(())
}
#[tokio::test]
async fn run_search_handles_no_matches() -> anyhow::Result<()> {
if !rg_available() {
return Ok(());
}
let temp = tempdir().expect("create temp dir");
let dir = temp.path();
std::fs::write(dir.join("one.txt"), "omega").unwrap();
let results = run_rg_search("alpha", None, dir, 5, dir).await?;
assert!(results.is_empty());
Ok(())
}
fn rg_available() -> bool {
StdCommand::new("rg")
.arg("--version")
.output()
.map(|output| output.status.success())
.unwrap_or(false)
}
}

View File

@@ -1,5 +1,6 @@
pub mod apply_patch;
mod exec_stream;
mod grep_files;
mod list_dir;
mod mcp;
mod plan;
@@ -13,6 +14,7 @@ pub use plan::PLAN_TOOL;
pub use apply_patch::ApplyPatchHandler;
pub use exec_stream::ExecStreamHandler;
pub use grep_files::GrepFilesHandler;
pub use list_dir::ListDirHandler;
pub use mcp::McpHandler;
pub use plan::PlanHandler;

View File

@@ -320,6 +320,56 @@ fn create_test_sync_tool() -> ToolSpec {
})
}
fn create_grep_files_tool() -> ToolSpec {
let mut properties = BTreeMap::new();
properties.insert(
"pattern".to_string(),
JsonSchema::String {
description: Some("Regular expression pattern to search for.".to_string()),
},
);
properties.insert(
"include".to_string(),
JsonSchema::String {
description: Some(
"Optional glob that limits which files are searched (e.g. \"*.rs\" or \
\"*.{ts,tsx}\")."
.to_string(),
),
},
);
properties.insert(
"path".to_string(),
JsonSchema::String {
description: Some(
"Directory or file path to search. Defaults to the session's working directory."
.to_string(),
),
},
);
properties.insert(
"limit".to_string(),
JsonSchema::Number {
description: Some(
"Maximum number of file paths to return (defaults to 100).".to_string(),
),
},
);
ToolSpec::Function(ResponsesApiTool {
name: "grep_files".to_string(),
description: "Finds files whose contents match the pattern and lists them by modification \
time."
.to_string(),
strict: false,
parameters: JsonSchema::Object {
properties,
required: Some(vec!["pattern".to_string()]),
additional_properties: Some(false.into()),
},
})
}
fn create_read_file_tool() -> ToolSpec {
let mut properties = BTreeMap::new();
properties.insert(
@@ -610,6 +660,7 @@ pub(crate) fn build_specs(
use crate::exec_command::create_write_stdin_tool_for_responses_api;
use crate::tools::handlers::ApplyPatchHandler;
use crate::tools::handlers::ExecStreamHandler;
use crate::tools::handlers::GrepFilesHandler;
use crate::tools::handlers::ListDirHandler;
use crate::tools::handlers::McpHandler;
use crate::tools::handlers::PlanHandler;
@@ -678,8 +729,16 @@ pub(crate) fn build_specs(
if config
.experimental_supported_tools
.iter()
.any(|tool| tool == "read_file")
.contains(&"grep_files".to_string())
{
let grep_files_handler = Arc::new(GrepFilesHandler);
builder.push_spec_with_parallel_support(create_grep_files_tool(), true);
builder.register_handler("grep_files", grep_files_handler);
}
if config
.experimental_supported_tools
.contains(&"read_file".to_string())
{
let read_file_handler = Arc::new(ReadFileHandler);
builder.push_spec_with_parallel_support(create_read_file_tool(), true);
@@ -698,8 +757,7 @@ pub(crate) fn build_specs(
if config
.experimental_supported_tools
.iter()
.any(|tool| tool == "test_sync_tool")
.contains(&"test_sync_tool".to_string())
{
let test_sync_handler = Arc::new(TestSyncHandler);
builder.push_spec_with_parallel_support(create_test_sync_tool(), true);
@@ -841,8 +899,9 @@ mod tests {
let (tools, _) = build_specs(&config, None).build();
assert!(!find_tool(&tools, "unified_exec").supports_parallel_tool_calls);
assert!(find_tool(&tools, "read_file").supports_parallel_tool_calls);
assert!(find_tool(&tools, "grep_files").supports_parallel_tool_calls);
assert!(find_tool(&tools, "list_dir").supports_parallel_tool_calls);
assert!(find_tool(&tools, "read_file").supports_parallel_tool_calls);
}
#[test]
@@ -870,6 +929,11 @@ mod tests {
.iter()
.any(|tool| tool_name(&tool.spec) == "read_file")
);
assert!(
tools
.iter()
.any(|tool| tool_name(&tool.spec) == "grep_files")
);
assert!(tools.iter().any(|tool| tool_name(&tool.spec) == "list_dir"));
}