Compare commits

...

6 Commits

Author SHA1 Message Date
Richard Feldman
c415009e4f Handle long lines in regex search 2025-03-18 11:21:36 -04:00
Richard Feldman
2c2bce478d Fix offset behavior in regex search 2025-03-18 11:21:36 -04:00
Richard Feldman
e26e4ee693 Use 20 results per page on regex search 2025-03-18 11:21:36 -04:00
Richard Feldman
8144c938d3 Change read file tool to use line numbers too 2025-03-18 11:21:36 -04:00
Richard Feldman
e53221f1f9 Show line numbers in regex tool results 2025-03-18 11:21:36 -04:00
Richard Feldman
38f1d14876 Paginate path search 2025-03-18 11:21:36 -04:00
5 changed files with 206 additions and 29 deletions

View File

@@ -23,8 +23,15 @@ pub struct PathSearchToolInput {
/// You can get back the first two paths by providing a glob of "*thing*.txt"
/// </example>
pub glob: String,
/// Optional starting position for paginated results (0-based).
/// When not provided, starts from the beginning.
#[serde(default)]
pub offset: Option<usize>,
}
const RESULTS_PER_PAGE: usize = 50;
pub struct PathSearchTool;
impl Tool for PathSearchTool {
@@ -49,8 +56,8 @@ impl Tool for PathSearchTool {
_action_log: Entity<ActionLog>,
cx: &mut App,
) -> Task<Result<String>> {
let glob = match serde_json::from_value::<PathSearchToolInput>(input) {
Ok(input) => input.glob,
let (offset, glob) = match serde_json::from_value::<PathSearchToolInput>(input) {
Ok(input) => (input.offset.unwrap_or(0), input.glob),
Err(err) => return Task::ready(Err(anyhow!(err))),
};
let path_matcher = match PathMatcher::new(&[glob.clone()]) {
@@ -87,7 +94,27 @@ impl Tool for PathSearchTool {
} else {
// Sort to group entries in the same directory together.
matches.sort();
Ok(matches.join("\n"))
let total_matches = matches.len();
let response = if total_matches > offset + RESULTS_PER_PAGE {
let paginated_matches: Vec<_> = matches
.into_iter()
.skip(offset)
.take(RESULTS_PER_PAGE)
.collect();
format!(
"Found {} total matches. Showing results {}-{} (provide 'offset' parameter for more results):\n\n{}",
total_matches,
offset + 1,
offset + paginated_matches.len(),
paginated_matches.join("\n")
)
} else {
matches.join("\n")
};
Ok(response)
}
})
}

View File

@@ -1 +1,3 @@
Returns all the paths in the project which match the given glob.
Returns paths in the project which match the given glob.
Results are paginated with 50 matches per page. Use the optional 'offset' parameter to request subsequent pages.

View File

@@ -28,13 +28,13 @@ pub struct ReadFileToolInput {
/// </example>
pub path: Arc<Path>,
/// Optional line number to start reading from (0-based index)
/// Optional line number to start reading on (1-based index)
#[serde(default)]
pub start_line: Option<usize>,
/// Optional number of lines to read
/// Optional line number to end reading on (1-based index)
#[serde(default)]
pub line_count: Option<usize>,
pub end_line: Option<usize>,
}
pub struct ReadFileTool;
@@ -83,10 +83,12 @@ impl Tool for ReadFileTool {
.map_or(false, |file| file.disk_state().exists())
{
let text = buffer.text();
let string = if input.start_line.is_some() || input.line_count.is_some() {
let lines = text.split('\n').skip(input.start_line.unwrap_or(0));
if let Some(line_count) = input.line_count {
Itertools::intersperse(lines.take(line_count), "\n").collect()
let string = if input.start_line.is_some() || input.end_line.is_some() {
let start = input.start_line.unwrap_or(1);
let lines = text.split('\n').skip(start - 1);
if let Some(end) = input.end_line {
let count = end.saturating_sub(start);
Itertools::intersperse(lines.take(count), "\n").collect()
} else {
Itertools::intersperse(lines, "\n").collect()
}

View File

@@ -2,9 +2,43 @@ use anyhow::{anyhow, Result};
use assistant_tool::{ActionLog, Tool};
use futures::StreamExt;
use gpui::{App, Entity, Task};
use language::OffsetRangeExt;
use language::{OffsetRangeExt, Point};
fn matches_regex(text: String, pattern: &str) -> bool {
// Safely check if pattern exists in text
if pattern.is_empty() {
return false;
}
text.contains(pattern)
}
fn find_matches(text: String, pattern: &str) -> Vec<(usize, usize)> {
let mut matches = Vec::new();
if pattern.is_empty() {
return matches;
}
let mut start = 0;
while start < text.len() {
match text[start..].find(pattern) {
Some(pos) => {
let match_start = start + pos;
let match_end = match_start + pattern.len();
if match_end <= text.len() {
matches.push((match_start, match_end));
}
start = match_start + 1;
}
None => break,
}
}
matches
}
use language_model::LanguageModelRequestMessage;
use project::{search::SearchQuery, Project};
use project::{
search::{SearchQuery, SearchResult},
Project,
};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::{cmp, fmt::Write, sync::Arc};
@@ -15,8 +49,17 @@ pub struct RegexSearchToolInput {
/// A regex pattern to search for in the entire project. Note that the regex
/// will be parsed by the Rust `regex` crate.
pub regex: String,
/// Optional starting position for paginated results (0-based).
/// When not provided, starts from the beginning.
#[serde(default)]
pub offset: Option<usize>,
}
const RESULTS_PER_PAGE: usize = 20;
const MAX_LINE_LENGTH: usize = 240;
const LONG_LINE_CONTEXT: usize = 120;
pub struct RegexSearchTool;
impl Tool for RegexSearchTool {
@@ -41,15 +84,17 @@ impl Tool for RegexSearchTool {
_action_log: Entity<ActionLog>,
cx: &mut App,
) -> Task<Result<String>> {
const CONTEXT_LINES: u32 = 2;
const CONTEXT_LINES: usize = 2;
let input = match serde_json::from_value::<RegexSearchToolInput>(input) {
Ok(input) => input,
Err(err) => return Task::ready(Err(anyhow!(err))),
};
let offset = input.offset.unwrap_or(0);
let regex_str = input.regex;
let query = match SearchQuery::regex(
&input.regex,
&regex_str,
false,
false,
false,
@@ -62,30 +107,33 @@ impl Tool for RegexSearchTool {
};
let results = project.update(cx, |project, cx| project.search(query, cx));
cx.spawn(|cx| async move {
futures::pin_mut!(results);
let mut output = String::new();
while let Some(project::search::SearchResult::Buffer { buffer, ranges }) =
results.next().await
{
let mut skips_remaining = offset;
let mut matches_found = 0;
let mut has_more_matches = false;
while let Some(SearchResult::Buffer { buffer, ranges }) = results.next().await {
if ranges.is_empty() {
continue;
}
buffer.read_with(&cx, |buffer, cx| {
buffer.read_with(&cx, |buffer, cx| -> Result<(), anyhow::Error> {
if let Some(path) = buffer.file().map(|file| file.full_path(cx)) {
writeln!(output, "### Found matches in {}:\n", path.display()).unwrap();
let mut file_header_written = false;
let mut ranges = ranges
.into_iter()
.map(|range| {
let mut point_range = range.to_point(buffer);
point_range.start.row =
point_range.start.row.saturating_sub(CONTEXT_LINES);
let context_lines_u32 = CONTEXT_LINES as u32;
point_range.start.row = point_range.start.row.saturating_sub(context_lines_u32);
point_range.start.column = 0;
point_range.end.row = cmp::min(
buffer.max_point().row,
point_range.end.row + CONTEXT_LINES,
point_range.end.row + (CONTEXT_LINES as u32),
);
point_range.end.column = buffer.line_len(point_range.end.row);
point_range
@@ -93,6 +141,16 @@ impl Tool for RegexSearchTool {
.peekable();
while let Some(mut range) = ranges.next() {
if matches_found >= RESULTS_PER_PAGE {
has_more_matches = true;
return Ok(());
}
if skips_remaining > 0 {
skips_remaining -= 1;
continue;
}
while let Some(next_range) = ranges.peek() {
if range.end.row >= next_range.start.row {
range.end = next_range.end;
@@ -102,18 +160,104 @@ impl Tool for RegexSearchTool {
}
}
writeln!(output, "```").unwrap();
output.extend(buffer.text_for_range(range));
writeln!(output, "\n```\n").unwrap();
if !file_header_written {
writeln!(output, "\n## Matches in {}", path.display())?;
file_header_written = true;
}
let mut processed_lines = std::collections::HashSet::<u32>::new();
// Process matches in two passes:
// 1. Long lines (>240 chars): Show only the matched line, with 120 chars of context around each match
// 2. Regular lines: Show the matched line plus context lines before/after
// First pass: handle long lines
for row in range.start.row..=range.end.row {
let row_u32 = row as u32;
let line_len = buffer.line_len(row_u32);
if (line_len as usize) > MAX_LINE_LENGTH {
let line_range = Point::new(row_u32, 0)..Point::new(row_u32, line_len);
let line_text = buffer.text_for_range(line_range).collect::<String>();
if matches_regex(line_text.clone(), &regex_str) {
if skips_remaining == 0 {
// Show each match in the long line with limited context
for (match_start, match_end) in find_matches(line_text.clone(), &regex_str) {
let start_char = match_start.saturating_sub(LONG_LINE_CONTEXT);
let end_char = (match_end + LONG_LINE_CONTEXT).min(line_len as usize);
writeln!(output, "\n# Line {}, chars {}-{}\n```", row_u32 + 1, start_char, end_char)?;
output.push_str(&line_text[start_char..end_char]);
output.push_str("\n```\n");
}
matches_found += 1;
} else {
skips_remaining -= 1;
}
processed_lines.insert(row_u32);
}
}
}
// Second pass: handle regular lines with context
let mut row = range.start.row;
while row <= range.end.row {
let row_u32 = row as u32;
if processed_lines.contains(&row_u32) {
row += 1;
continue;
}
let line_len = buffer.line_len(row_u32);
let line_range = Point::new(row_u32, 0)..Point::new(row_u32, line_len);
let line_text = buffer.text_for_range(line_range).collect::<String>();
if matches_regex(line_text.clone(), &regex_str) {
if skips_remaining > 0 {
skips_remaining -= 1;
row += 1;
continue;
}
// Show the match with context lines
let context_start = (row as usize).saturating_sub(CONTEXT_LINES) as u32;
let context_end = ((row as usize + CONTEXT_LINES) as u32).min(buffer.max_point().row);
let context_range = Point::new(context_start, 0)..Point::new(context_end, buffer.line_len(context_end));
writeln!(output, "\n### Lines {}-{}\n```", context_start + 1, context_end + 1)?;
output.push_str(&buffer.text_for_range(context_range).collect::<String>());
output.push_str("\n```\n");
// Mark all lines in this context range as processed
for r in context_start..=context_end {
processed_lines.insert(r);
}
matches_found += 1;
row = context_end + 1;
} else {
row += 1;
}
}
}
}
})?;
Ok(())
})??;
}
if output.is_empty() {
if matches_found == 0 {
Ok("No matches found".to_string())
} else if has_more_matches {
Ok(format!(
"Showing matches {}-{} (there were more matches found; use offset: {} to see next page):\n{output}",
offset + 1,
offset + matches_found,
offset + RESULTS_PER_PAGE,
))
} else {
Ok(output)
Ok(format!("Found {matches_found} matches:\n{output}"))
}
})
}

View File

@@ -1,3 +1,5 @@
Searches the entire project for the given regular expression.
Returns a list of paths that matched the query. For each path, it returns a list of excerpts of the matched text.
Results are paginated with 20 matches per page. Use the optional 'offset' parameter to request subsequent pages.