Handle long lines in regex search

Fix offset behavior in regex search
Use 20 results per page on regex search
2025-03-18 11:21:36 -04:00 · 2025-03-18 11:21:36 -04:00 · 2025-03-18 11:21:36 -04:00 · 2025-03-18 11:21:36 -04:00 · 2025-03-18 11:21:36 -04:00 · 2025-03-18 11:21:36 -04:00
5 changed files with 206 additions and 29 deletions
--- a/crates/assistant_tools/src/path_search_tool.rs
+++ b/crates/assistant_tools/src/path_search_tool.rs
@@ -23,8 +23,15 @@ pub struct PathSearchToolInput {
    /// You can get back the first two paths by providing a glob of "*thing*.txt"
    /// </example>
    pub glob: String,
+
+    /// Optional starting position for paginated results (0-based).
+    /// When not provided, starts from the beginning.
+    #[serde(default)]
+    pub offset: Option<usize>,
 }

+const RESULTS_PER_PAGE: usize = 50;
+
 pub struct PathSearchTool;

 impl Tool for PathSearchTool {
@@ -49,8 +56,8 @@ impl Tool for PathSearchTool {
        _action_log: Entity<ActionLog>,
        cx: &mut App,
    ) -> Task<Result<String>> {
-        let glob = match serde_json::from_value::<PathSearchToolInput>(input) {
-            Ok(input) => input.glob,
+        let (offset, glob) = match serde_json::from_value::<PathSearchToolInput>(input) {
+            Ok(input) => (input.offset.unwrap_or(0), input.glob),
            Err(err) => return Task::ready(Err(anyhow!(err))),
        };
        let path_matcher = match PathMatcher::new(&[glob.clone()]) {
@@ -87,7 +94,27 @@ impl Tool for PathSearchTool {
            } else {
                // Sort to group entries in the same directory together.
                matches.sort();
-                Ok(matches.join("\n"))
+
+                let total_matches = matches.len();
+                let response = if total_matches > offset + RESULTS_PER_PAGE {
+                  let paginated_matches: Vec<_> = matches
+                      .into_iter()
+                      .skip(offset)
+                      .take(RESULTS_PER_PAGE)
+                      .collect();
+
+                    format!(
+                        "Found {} total matches. Showing results {}-{} (provide 'offset' parameter for more results):\n\n{}",
+                        total_matches,
+                        offset + 1,
+                        offset + paginated_matches.len(),
+                        paginated_matches.join("\n")
+                    )
+                } else {
+                    matches.join("\n")
+                };
+
+                Ok(response)
            }
        })
    }
--- a/crates/assistant_tools/src/path_search_tool/description.md
+++ b/crates/assistant_tools/src/path_search_tool/description.md
@@ -1 +1,3 @@
-Returns all the paths in the project which match the given glob.
+Returns paths in the project which match the given glob.
+
+Results are paginated with 50 matches per page. Use the optional 'offset' parameter to request subsequent pages.
--- a/crates/assistant_tools/src/read_file_tool.rs
+++ b/crates/assistant_tools/src/read_file_tool.rs
@@ -28,13 +28,13 @@ pub struct ReadFileToolInput {
    /// </example>
    pub path: Arc<Path>,

-    /// Optional line number to start reading from (0-based index)
+    /// Optional line number to start reading on (1-based index)
    #[serde(default)]
    pub start_line: Option<usize>,

-    /// Optional number of lines to read
+    /// Optional line number to end reading on (1-based index)
    #[serde(default)]
-    pub line_count: Option<usize>,
+    pub end_line: Option<usize>,
 }

 pub struct ReadFileTool;
@@ -83,10 +83,12 @@ impl Tool for ReadFileTool {
                    .map_or(false, |file| file.disk_state().exists())
                {
                    let text = buffer.text();
-                    let string = if input.start_line.is_some() || input.line_count.is_some() {
-                        let lines = text.split('\n').skip(input.start_line.unwrap_or(0));
-                        if let Some(line_count) = input.line_count {
-                            Itertools::intersperse(lines.take(line_count), "\n").collect()
+                    let string = if input.start_line.is_some() || input.end_line.is_some() {
+                        let start = input.start_line.unwrap_or(1);
+                        let lines = text.split('\n').skip(start - 1);
+                        if let Some(end) = input.end_line {
+                            let count = end.saturating_sub(start);
+                            Itertools::intersperse(lines.take(count), "\n").collect()
                        } else {
                            Itertools::intersperse(lines, "\n").collect()
                        }
--- a/crates/assistant_tools/src/regex_search.rs
+++ b/crates/assistant_tools/src/regex_search.rs
@@ -2,9 +2,43 @@ use anyhow::{anyhow, Result};
 use assistant_tool::{ActionLog, Tool};
 use futures::StreamExt;
 use gpui::{App, Entity, Task};
-use language::OffsetRangeExt;
+use language::{OffsetRangeExt, Point};
+
+fn matches_regex(text: String, pattern: &str) -> bool {
+    // Safely check if pattern exists in text
+    if pattern.is_empty() {
+        return false;
+    }
+    text.contains(pattern)
+}
+
+fn find_matches(text: String, pattern: &str) -> Vec<(usize, usize)> {
+    let mut matches = Vec::new();
+    if pattern.is_empty() {
+        return matches;
+    }
+
+    let mut start = 0;
+    while start < text.len() {
+        match text[start..].find(pattern) {
+            Some(pos) => {
+                let match_start = start + pos;
+                let match_end = match_start + pattern.len();
+                if match_end <= text.len() {
+                    matches.push((match_start, match_end));
+                }
+                start = match_start + 1;
+            }
+            None => break,
+        }
+    }
+    matches
+}
 use language_model::LanguageModelRequestMessage;
-use project::{search::SearchQuery, Project};
+use project::{
+    search::{SearchQuery, SearchResult},
+    Project,
+};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use std::{cmp, fmt::Write, sync::Arc};
@@ -15,8 +49,17 @@ pub struct RegexSearchToolInput {
    /// A regex pattern to search for in the entire project. Note that the regex
    /// will be parsed by the Rust `regex` crate.
    pub regex: String,
+
+    /// Optional starting position for paginated results (0-based).
+    /// When not provided, starts from the beginning.
+    #[serde(default)]
+    pub offset: Option<usize>,
 }

+const RESULTS_PER_PAGE: usize = 20;
+const MAX_LINE_LENGTH: usize = 240;
+const LONG_LINE_CONTEXT: usize = 120;
+
 pub struct RegexSearchTool;

 impl Tool for RegexSearchTool {
@@ -41,15 +84,17 @@ impl Tool for RegexSearchTool {
        _action_log: Entity<ActionLog>,
        cx: &mut App,
    ) -> Task<Result<String>> {
-        const CONTEXT_LINES: u32 = 2;
+        const CONTEXT_LINES: usize = 2;

        let input = match serde_json::from_value::<RegexSearchToolInput>(input) {
            Ok(input) => input,
            Err(err) => return Task::ready(Err(anyhow!(err))),
        };
+        let offset = input.offset.unwrap_or(0);
+        let regex_str = input.regex;

        let query = match SearchQuery::regex(
-            &input.regex,
+            &regex_str,
            false,
            false,
            false,
@@ -62,30 +107,33 @@ impl Tool for RegexSearchTool {
        };

        let results = project.update(cx, |project, cx| project.search(query, cx));
+
        cx.spawn(|cx| async move {
            futures::pin_mut!(results);

            let mut output = String::new();
-            while let Some(project::search::SearchResult::Buffer { buffer, ranges }) =
-                results.next().await
-            {
+            let mut skips_remaining = offset;
+            let mut matches_found = 0;
+            let mut has_more_matches = false;
+
+            while let Some(SearchResult::Buffer { buffer, ranges }) = results.next().await {
                if ranges.is_empty() {
                    continue;
                }

-                buffer.read_with(&cx, |buffer, cx| {
+                buffer.read_with(&cx, |buffer, cx| -> Result<(), anyhow::Error> {
                    if let Some(path) = buffer.file().map(|file| file.full_path(cx)) {
-                        writeln!(output, "### Found matches in {}:\n", path.display()).unwrap();
+                        let mut file_header_written = false;
                        let mut ranges = ranges
                            .into_iter()
                            .map(|range| {
                                let mut point_range = range.to_point(buffer);
-                                point_range.start.row =
-                                    point_range.start.row.saturating_sub(CONTEXT_LINES);
+                                let context_lines_u32 = CONTEXT_LINES as u32;
+                                point_range.start.row = point_range.start.row.saturating_sub(context_lines_u32);
                                point_range.start.column = 0;
                                point_range.end.row = cmp::min(
                                    buffer.max_point().row,
-                                    point_range.end.row + CONTEXT_LINES,
+                                    point_range.end.row + (CONTEXT_LINES as u32),
                                );
                                point_range.end.column = buffer.line_len(point_range.end.row);
                                point_range
@@ -93,6 +141,16 @@ impl Tool for RegexSearchTool {
                            .peekable();

                        while let Some(mut range) = ranges.next() {
+                            if matches_found >= RESULTS_PER_PAGE {
+                                has_more_matches = true;
+                                return Ok(());
+                            }
+
+                            if skips_remaining > 0 {
+                                skips_remaining -= 1;
+                                continue;
+                            }
+
                            while let Some(next_range) = ranges.peek() {
                                if range.end.row >= next_range.start.row {
                                    range.end = next_range.end;
@@ -102,18 +160,104 @@ impl Tool for RegexSearchTool {
                                }
                            }

-                            writeln!(output, "```").unwrap();
-                            output.extend(buffer.text_for_range(range));
-                            writeln!(output, "\n```\n").unwrap();
+                            if !file_header_written {
+                                writeln!(output, "\n## Matches in {}", path.display())?;
+                                file_header_written = true;
+                            }
+
+                            let mut processed_lines = std::collections::HashSet::<u32>::new();
+
+                            // Process matches in two passes:
+                            // 1. Long lines (>240 chars): Show only the matched line, with 120 chars of context around each match
+                            // 2. Regular lines: Show the matched line plus context lines before/after
+
+                            // First pass: handle long lines
+                            for row in range.start.row..=range.end.row {
+                                let row_u32 = row as u32;
+                                let line_len = buffer.line_len(row_u32);
+                                if (line_len as usize) > MAX_LINE_LENGTH {
+                                    let line_range = Point::new(row_u32, 0)..Point::new(row_u32, line_len);
+                                    let line_text = buffer.text_for_range(line_range).collect::<String>();
+
+                                    if matches_regex(line_text.clone(), &regex_str) {
+                                        if skips_remaining == 0 {
+                                            // Show each match in the long line with limited context
+                                            for (match_start, match_end) in find_matches(line_text.clone(), &regex_str) {
+                                                let start_char = match_start.saturating_sub(LONG_LINE_CONTEXT);
+                                                let end_char = (match_end + LONG_LINE_CONTEXT).min(line_len as usize);
+                                                writeln!(output, "\n# Line {}, chars {}-{}\n```", row_u32 + 1, start_char, end_char)?;
+                                                output.push_str(&line_text[start_char..end_char]);
+                                                output.push_str("\n```\n");
+                                            }
+                                            matches_found += 1;
+                                        } else {
+                                            skips_remaining -= 1;
+                                        }
+
+                                        processed_lines.insert(row_u32);
+                                    }
+                                }
+                            }
+
+                            // Second pass: handle regular lines with context
+                            let mut row = range.start.row;
+                            while row <= range.end.row {
+                                let row_u32 = row as u32;
+                                if processed_lines.contains(&row_u32) {
+                                    row += 1;
+                                    continue;
+                                }
+
+                                let line_len = buffer.line_len(row_u32);
+                                let line_range = Point::new(row_u32, 0)..Point::new(row_u32, line_len);
+                                let line_text = buffer.text_for_range(line_range).collect::<String>();
+
+                                if matches_regex(line_text.clone(), &regex_str) {
+                                    if skips_remaining > 0 {
+                                        skips_remaining -= 1;
+                                        row += 1;
+                                        continue;
+                                    }
+
+                                    // Show the match with context lines
+                                    let context_start = (row as usize).saturating_sub(CONTEXT_LINES) as u32;
+                                    let context_end = ((row as usize + CONTEXT_LINES) as u32).min(buffer.max_point().row);
+                                    let context_range = Point::new(context_start, 0)..Point::new(context_end, buffer.line_len(context_end));
+
+                                    writeln!(output, "\n### Lines {}-{}\n```", context_start + 1, context_end + 1)?;
+                                    output.push_str(&buffer.text_for_range(context_range).collect::<String>());
+                                    output.push_str("\n```\n");
+
+                                    // Mark all lines in this context range as processed
+                                    for r in context_start..=context_end {
+                                        processed_lines.insert(r);
+                                    }
+
+                                    matches_found += 1;
+                                    row = context_end + 1;
+                                } else {
+                                    row += 1;
+                                }
+                            }
+
                        }
                    }
-                })?;
+
+                    Ok(())
+                })??;
            }

-            if output.is_empty() {
+            if matches_found == 0 {
                Ok("No matches found".to_string())
+            } else if has_more_matches {
+                Ok(format!(
+                    "Showing matches {}-{} (there were more matches found; use offset: {} to see next page):\n{output}",
+                    offset + 1,
+                    offset + matches_found,
+                    offset + RESULTS_PER_PAGE,
+                ))
            } else {
-                Ok(output)
+                Ok(format!("Found {matches_found} matches:\n{output}"))
            }
        })
    }
--- a/crates/assistant_tools/src/regex_search_tool/description.md
+++ b/crates/assistant_tools/src/regex_search_tool/description.md
@@ -1,3 +1,5 @@
 Searches the entire project for the given regular expression.

 Returns a list of paths that matched the query. For each path, it returns a list of excerpts of the matched text.
+
+Results are paginated with 20 matches per page. Use the optional 'offset' parameter to request subsequent pages.
Author	SHA1	Message	Date
Richard Feldman	c415009e4f	Handle long lines in regex search	2025-03-18 11:21:36 -04:00
Richard Feldman	2c2bce478d	Fix offset behavior in regex search	2025-03-18 11:21:36 -04:00
Richard Feldman	e26e4ee693	Use 20 results per page on regex search	2025-03-18 11:21:36 -04:00
Richard Feldman	8144c938d3	Change read file tool to use line numbers too	2025-03-18 11:21:36 -04:00
Richard Feldman	e53221f1f9	Show line numbers in regex tool results	2025-03-18 11:21:36 -04:00
Richard Feldman	38f1d14876	Paginate path search	2025-03-18 11:21:36 -04:00