feat: grep_files as a tool (#4820)
Add `grep_files` to be able to perform more action in parallel
This commit is contained in:
272
codex-rs/core/src/tools/handlers/grep_files.rs
Normal file
272
codex-rs/core/src/tools/handlers/grep_files.rs
Normal file
@@ -0,0 +1,272 @@
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Deserialize;
|
||||
use tokio::process::Command;
|
||||
use tokio::time::timeout;
|
||||
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
|
||||
pub struct GrepFilesHandler;
|
||||
|
||||
const DEFAULT_LIMIT: usize = 100;
|
||||
const MAX_LIMIT: usize = 2000;
|
||||
const COMMAND_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
fn default_limit() -> usize {
|
||||
DEFAULT_LIMIT
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GrepFilesArgs {
|
||||
pattern: String,
|
||||
#[serde(default)]
|
||||
include: Option<String>,
|
||||
#[serde(default)]
|
||||
path: Option<String>,
|
||||
#[serde(default = "default_limit")]
|
||||
limit: usize,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToolHandler for GrepFilesHandler {
|
||||
fn kind(&self) -> ToolKind {
|
||||
ToolKind::Function
|
||||
}
|
||||
|
||||
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
|
||||
let ToolInvocation { payload, turn, .. } = invocation;
|
||||
|
||||
let arguments = match payload {
|
||||
ToolPayload::Function { arguments } => arguments,
|
||||
_ => {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"grep_files handler received unsupported payload".to_string(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let args: GrepFilesArgs = serde_json::from_str(&arguments).map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"failed to parse function arguments: {err:?}"
|
||||
))
|
||||
})?;
|
||||
|
||||
let pattern = args.pattern.trim();
|
||||
if pattern.is_empty() {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"pattern must not be empty".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if args.limit == 0 {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"limit must be greater than zero".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let limit = args.limit.min(MAX_LIMIT);
|
||||
let search_path = turn.resolve_path(args.path.clone());
|
||||
|
||||
verify_path_exists(&search_path).await?;
|
||||
|
||||
let include = args.include.as_deref().map(str::trim).and_then(|val| {
|
||||
if val.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(val.to_string())
|
||||
}
|
||||
});
|
||||
|
||||
let search_results =
|
||||
run_rg_search(pattern, include.as_deref(), &search_path, limit, &turn.cwd).await?;
|
||||
|
||||
if search_results.is_empty() {
|
||||
Ok(ToolOutput::Function {
|
||||
content: "No matches found.".to_string(),
|
||||
success: Some(false),
|
||||
})
|
||||
} else {
|
||||
Ok(ToolOutput::Function {
|
||||
content: search_results.join("\n"),
|
||||
success: Some(true),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn verify_path_exists(path: &Path) -> Result<(), FunctionCallError> {
|
||||
tokio::fs::metadata(path).await.map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!("unable to access `{}`: {err}", path.display()))
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_rg_search(
|
||||
pattern: &str,
|
||||
include: Option<&str>,
|
||||
search_path: &Path,
|
||||
limit: usize,
|
||||
cwd: &Path,
|
||||
) -> Result<Vec<String>, FunctionCallError> {
|
||||
let mut command = Command::new("rg");
|
||||
command
|
||||
.current_dir(cwd)
|
||||
.arg("--files-with-matches")
|
||||
.arg("--sortr=modified")
|
||||
.arg("--regexp")
|
||||
.arg(pattern)
|
||||
.arg("--no-messages");
|
||||
|
||||
if let Some(glob) = include {
|
||||
command.arg("--glob").arg(glob);
|
||||
}
|
||||
|
||||
command.arg("--").arg(search_path);
|
||||
|
||||
let output = timeout(COMMAND_TIMEOUT, command.output())
|
||||
.await
|
||||
.map_err(|_| {
|
||||
FunctionCallError::RespondToModel("rg timed out after 30 seconds".to_string())
|
||||
})?
|
||||
.map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"failed to launch rg: {err}. Ensure ripgrep is installed and on PATH."
|
||||
))
|
||||
})?;
|
||||
|
||||
match output.status.code() {
|
||||
Some(0) => Ok(parse_results(&output.stdout, limit)),
|
||||
Some(1) => Ok(Vec::new()),
|
||||
_ => {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
Err(FunctionCallError::RespondToModel(format!(
|
||||
"rg failed: {stderr}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_results(stdout: &[u8], limit: usize) -> Vec<String> {
|
||||
let mut results = Vec::new();
|
||||
for line in stdout.split(|byte| *byte == b'\n') {
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if let Ok(text) = std::str::from_utf8(line) {
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
results.push(text.to_string());
|
||||
if results.len() == limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
results
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::process::Command as StdCommand;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[test]
|
||||
fn parses_basic_results() {
|
||||
let stdout = b"/tmp/file_a.rs\n/tmp/file_b.rs\n";
|
||||
let parsed = parse_results(stdout, 10);
|
||||
assert_eq!(
|
||||
parsed,
|
||||
vec!["/tmp/file_a.rs".to_string(), "/tmp/file_b.rs".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_truncates_after_limit() {
|
||||
let stdout = b"/tmp/file_a.rs\n/tmp/file_b.rs\n/tmp/file_c.rs\n";
|
||||
let parsed = parse_results(stdout, 2);
|
||||
assert_eq!(
|
||||
parsed,
|
||||
vec!["/tmp/file_a.rs".to_string(), "/tmp/file_b.rs".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn run_search_returns_results() -> anyhow::Result<()> {
|
||||
if !rg_available() {
|
||||
return Ok(());
|
||||
}
|
||||
let temp = tempdir().expect("create temp dir");
|
||||
let dir = temp.path();
|
||||
std::fs::write(dir.join("match_one.txt"), "alpha beta gamma").unwrap();
|
||||
std::fs::write(dir.join("match_two.txt"), "alpha delta").unwrap();
|
||||
std::fs::write(dir.join("other.txt"), "omega").unwrap();
|
||||
|
||||
let results = run_rg_search("alpha", None, dir, 10, dir).await?;
|
||||
assert_eq!(results.len(), 2);
|
||||
assert!(results.iter().any(|path| path.ends_with("match_one.txt")));
|
||||
assert!(results.iter().any(|path| path.ends_with("match_two.txt")));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn run_search_with_glob_filter() -> anyhow::Result<()> {
|
||||
if !rg_available() {
|
||||
return Ok(());
|
||||
}
|
||||
let temp = tempdir().expect("create temp dir");
|
||||
let dir = temp.path();
|
||||
std::fs::write(dir.join("match_one.rs"), "alpha beta gamma").unwrap();
|
||||
std::fs::write(dir.join("match_two.txt"), "alpha delta").unwrap();
|
||||
|
||||
let results = run_rg_search("alpha", Some("*.rs"), dir, 10, dir).await?;
|
||||
assert_eq!(results.len(), 1);
|
||||
assert!(results.iter().all(|path| path.ends_with("match_one.rs")));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn run_search_respects_limit() -> anyhow::Result<()> {
|
||||
if !rg_available() {
|
||||
return Ok(());
|
||||
}
|
||||
let temp = tempdir().expect("create temp dir");
|
||||
let dir = temp.path();
|
||||
std::fs::write(dir.join("one.txt"), "alpha one").unwrap();
|
||||
std::fs::write(dir.join("two.txt"), "alpha two").unwrap();
|
||||
std::fs::write(dir.join("three.txt"), "alpha three").unwrap();
|
||||
|
||||
let results = run_rg_search("alpha", None, dir, 2, dir).await?;
|
||||
assert_eq!(results.len(), 2);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn run_search_handles_no_matches() -> anyhow::Result<()> {
|
||||
if !rg_available() {
|
||||
return Ok(());
|
||||
}
|
||||
let temp = tempdir().expect("create temp dir");
|
||||
let dir = temp.path();
|
||||
std::fs::write(dir.join("one.txt"), "omega").unwrap();
|
||||
|
||||
let results = run_rg_search("alpha", None, dir, 5, dir).await?;
|
||||
assert!(results.is_empty());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn rg_available() -> bool {
|
||||
StdCommand::new("rg")
|
||||
.arg("--version")
|
||||
.output()
|
||||
.map(|output| output.status.success())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
pub mod apply_patch;
|
||||
mod exec_stream;
|
||||
mod grep_files;
|
||||
mod list_dir;
|
||||
mod mcp;
|
||||
mod plan;
|
||||
@@ -13,6 +14,7 @@ pub use plan::PLAN_TOOL;
|
||||
|
||||
pub use apply_patch::ApplyPatchHandler;
|
||||
pub use exec_stream::ExecStreamHandler;
|
||||
pub use grep_files::GrepFilesHandler;
|
||||
pub use list_dir::ListDirHandler;
|
||||
pub use mcp::McpHandler;
|
||||
pub use plan::PlanHandler;
|
||||
|
||||
Reference in New Issue
Block a user