Handle long lines in regex search

2025-03-18 11:20:04 -04:00
parent 2c2bce478d
commit c415009e4f
1 changed files with 123 additions and 21 deletions
--- a/crates/assistant_tools/src/regex_search.rs
+++ b/crates/assistant_tools/src/regex_search.rs
@@ -2,7 +2,38 @@ use anyhow::{anyhow, Result};
 use assistant_tool::{ActionLog, Tool};
 use futures::StreamExt;
 use gpui::{App, Entity, Task};
-use language::OffsetRangeExt;
+use language::{OffsetRangeExt, Point};
+
+fn matches_regex(text: String, pattern: &str) -> bool {
+    // Safely check if pattern exists in text
+    if pattern.is_empty() {
+        return false;
+    }
+    text.contains(pattern)
+}
+
+fn find_matches(text: String, pattern: &str) -> Vec<(usize, usize)> {
+    let mut matches = Vec::new();
+    if pattern.is_empty() {
+        return matches;
+    }
+
+    let mut start = 0;
+    while start < text.len() {
+        match text[start..].find(pattern) {
+            Some(pos) => {
+                let match_start = start + pos;
+                let match_end = match_start + pattern.len();
+                if match_end <= text.len() {
+                    matches.push((match_start, match_end));
+                }
+                start = match_start + 1;
+            }
+            None => break,
+        }
+    }
+    matches
+}
 use language_model::LanguageModelRequestMessage;
 use project::{
    search::{SearchQuery, SearchResult},
@@ -26,6 +57,8 @@ pub struct RegexSearchToolInput {
 }

 const RESULTS_PER_PAGE: usize = 20;
+const MAX_LINE_LENGTH: usize = 240;
+const LONG_LINE_CONTEXT: usize = 120;

 pub struct RegexSearchTool;

@@ -51,15 +84,17 @@ impl Tool for RegexSearchTool {
        _action_log: Entity<ActionLog>,
        cx: &mut App,
    ) -> Task<Result<String>> {
-        const CONTEXT_LINES: u32 = 2;
+        const CONTEXT_LINES: usize = 2;

-        let (offset, regex) = match serde_json::from_value::<RegexSearchToolInput>(input) {
-            Ok(input) => (input.offset.unwrap_or(0), input.regex),
+        let input = match serde_json::from_value::<RegexSearchToolInput>(input) {
+            Ok(input) => input,
            Err(err) => return Task::ready(Err(anyhow!(err))),
        };
+        let offset = input.offset.unwrap_or(0);
+        let regex_str = input.regex;

        let query = match SearchQuery::regex(
-            &regex,
+            &regex_str,
            false,
            false,
            false,
@@ -93,12 +128,12 @@ impl Tool for RegexSearchTool {
                            .into_iter()
                            .map(|range| {
                                let mut point_range = range.to_point(buffer);
-                                point_range.start.row =
-                                    point_range.start.row.saturating_sub(CONTEXT_LINES);
+                                let context_lines_u32 = CONTEXT_LINES as u32;
+                                point_range.start.row = point_range.start.row.saturating_sub(context_lines_u32);
                                point_range.start.column = 0;
                                point_range.end.row = cmp::min(
                                    buffer.max_point().row,
-                                    point_range.end.row + CONTEXT_LINES,
+                                    point_range.end.row + (CONTEXT_LINES as u32),
                                );
                                point_range.end.column = buffer.line_len(point_range.end.row);
                                point_range
@@ -106,17 +141,16 @@ impl Tool for RegexSearchTool {
                            .peekable();

                        while let Some(mut range) = ranges.next() {
-                            if skips_remaining > 0 {
-                                skips_remaining -= 1;
-                                continue;
-                            }
-
-                            // We'd already found a full page of matches, and we just found one more.
                            if matches_found >= RESULTS_PER_PAGE {
                                has_more_matches = true;
                                return Ok(());
                            }

+                            if skips_remaining > 0 {
+                                skips_remaining -= 1;
+                                continue;
+                            }
+
                            while let Some(next_range) = ranges.peek() {
                                if range.end.row >= next_range.start.row {
                                    range.end = next_range.end;
@@ -131,13 +165,81 @@ impl Tool for RegexSearchTool {
                                file_header_written = true;
                            }

-                            let start_line = range.start.row + 1;
-                            let end_line = range.end.row + 1;
-                            writeln!(output, "\n### Lines {start_line}-{end_line}\n```")?;
-                            output.extend(buffer.text_for_range(range));
-                            output.push_str("\n```\n");
+                            let mut processed_lines = std::collections::HashSet::<u32>::new();
+
+                            // Process matches in two passes:
+                            // 1. Long lines (>240 chars): Show only the matched line, with 120 chars of context around each match
+                            // 2. Regular lines: Show the matched line plus context lines before/after
+
+                            // First pass: handle long lines
+                            for row in range.start.row..=range.end.row {
+                                let row_u32 = row as u32;
+                                let line_len = buffer.line_len(row_u32);
+                                if (line_len as usize) > MAX_LINE_LENGTH {
+                                    let line_range = Point::new(row_u32, 0)..Point::new(row_u32, line_len);
+                                    let line_text = buffer.text_for_range(line_range).collect::<String>();
+
+                                    if matches_regex(line_text.clone(), &regex_str) {
+                                        if skips_remaining == 0 {
+                                            // Show each match in the long line with limited context
+                                            for (match_start, match_end) in find_matches(line_text.clone(), &regex_str) {
+                                                let start_char = match_start.saturating_sub(LONG_LINE_CONTEXT);
+                                                let end_char = (match_end + LONG_LINE_CONTEXT).min(line_len as usize);
+                                                writeln!(output, "\n# Line {}, chars {}-{}\n```", row_u32 + 1, start_char, end_char)?;
+                                                output.push_str(&line_text[start_char..end_char]);
+                                                output.push_str("\n```\n");
+                                            }
+                                            matches_found += 1;
+                                        } else {
+                                            skips_remaining -= 1;
+                                        }
+
+                                        processed_lines.insert(row_u32);
+                                    }
+                                }
+                            }
+
+                            // Second pass: handle regular lines with context
+                            let mut row = range.start.row;
+                            while row <= range.end.row {
+                                let row_u32 = row as u32;
+                                if processed_lines.contains(&row_u32) {
+                                    row += 1;
+                                    continue;
+                                }
+
+                                let line_len = buffer.line_len(row_u32);
+                                let line_range = Point::new(row_u32, 0)..Point::new(row_u32, line_len);
+                                let line_text = buffer.text_for_range(line_range).collect::<String>();
+
+                                if matches_regex(line_text.clone(), &regex_str) {
+                                    if skips_remaining > 0 {
+                                        skips_remaining -= 1;
+                                        row += 1;
+                                        continue;
+                                    }
+
+                                    // Show the match with context lines
+                                    let context_start = (row as usize).saturating_sub(CONTEXT_LINES) as u32;
+                                    let context_end = ((row as usize + CONTEXT_LINES) as u32).min(buffer.max_point().row);
+                                    let context_range = Point::new(context_start, 0)..Point::new(context_end, buffer.line_len(context_end));
+
+                                    writeln!(output, "\n### Lines {}-{}\n```", context_start + 1, context_end + 1)?;
+                                    output.push_str(&buffer.text_for_range(context_range).collect::<String>());
+                                    output.push_str("\n```\n");
+
+                                    // Mark all lines in this context range as processed
+                                    for r in context_start..=context_end {
+                                        processed_lines.insert(r);
+                                    }
+
+                                    matches_found += 1;
+                                    row = context_end + 1;
+                                } else {
+                                    row += 1;
+                                }
+                            }

-                            matches_found += 1;
                        }
                    }

@@ -154,7 +256,7 @@ impl Tool for RegexSearchTool {
                    offset + matches_found,
                    offset + RESULTS_PER_PAGE,
                ))
-          } else {
+            } else {
                Ok(format!("Found {matches_found} matches:\n{output}"))
            }
        })