Compare commits

...

5 Commits

Author SHA1 Message Date
Richard Feldman
4d74b23ce8 wip address long lines in regex search 2025-03-19 10:44:44 -04:00
Richard Feldman
c248b2f26f wip 2025-03-18 12:15:52 -04:00
Richard Feldman
3b2258b9e3 Move find_matches down 2025-03-18 11:58:33 -04:00
Richard Feldman
d771c275f5 Get rid of unnecessary matches_regex function 2025-03-18 11:56:21 -04:00
Richard Feldman
618d0cbf59 Add long line handling 2025-03-18 11:51:23 -04:00

View File

@@ -2,7 +2,7 @@ use anyhow::{anyhow, Result};
use assistant_tool::{ActionLog, Tool};
use futures::StreamExt;
use gpui::{App, Entity, Task};
use language::OffsetRangeExt;
use language::{OffsetRangeExt, Point};
use language_model::LanguageModelRequestMessage;
use project::{
search::{SearchQuery, SearchResult},
@@ -10,7 +10,7 @@ use project::{
};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::{cmp, fmt::Write, sync::Arc};
use std::{cmp, fmt::Write, ops::Range, sync::Arc};
use util::paths::PathMatcher;
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
@@ -26,6 +26,8 @@ pub struct RegexSearchToolInput {
}
const RESULTS_PER_PAGE: usize = 20;
const MAX_LINE_LENGTH: u32 = 240;
const LONG_LINE_CONTEXT: usize = 120;
pub struct RegexSearchTool;
@@ -58,6 +60,10 @@ impl Tool for RegexSearchTool {
Err(err) => return Task::ready(Err(anyhow!(err))),
};
if regex.is_empty() {
return Task::ready(Err(anyhow!("Empty regex pattern is not allowed")));
};
let query = match SearchQuery::regex(
&regex,
false,
@@ -73,6 +79,11 @@ impl Tool for RegexSearchTool {
let results = project.update(cx, |project, cx| project.search(query, cx));
enum MatchRange {
Lines(Range<Point>),
LongLine(Range<Point>),
}
cx.spawn(|cx| async move {
futures::pin_mut!(results);
@@ -91,58 +102,80 @@ impl Tool for RegexSearchTool {
let mut file_header_written = false;
let mut ranges = ranges
.into_iter()
.map(|range| {
.map(|range| -> MatchRange {
let mut point_range = range.to_point(buffer);
point_range.start.row =
point_range.start.row.saturating_sub(CONTEXT_LINES);
point_range.start.column = 0;
point_range.end.row = cmp::min(
buffer.max_point().row,
point_range.end.row + CONTEXT_LINES,
);
point_range.end.column = buffer.line_len(point_range.end.row);
point_range
let is_long_line = buffer.line_len(point_range.start.row) > MAX_LINE_LENGTH;
if is_long_line {
let line_range = Point::new(point_range.start.row, 0)..Point::new(point_range.start.row, buffer.line_len(point_range.start.row));
let line_text = buffer.text_for_range(line_range).collect::<String>();
for (match_start, match_end) in find_matches(line_text.clone(), &regex) {
let start_char = match_start.saturating_sub(LONG_LINE_CONTEXT);
let end_char = (match_end + LONG_LINE_CONTEXT).min(buffer.line_len(point_range.start.row) as usize);
writeln!(output, "\n### Line {}, chars {}-{}\n```", point_range.start.row + 1, start_char, end_char)?;
output.push_str(&line_text[start_char..end_char]);
output.push_str("\n```\n");
}
matches_found += 1;
if matches_found >= RESULTS_PER_PAGE {
has_more_matches = true;
}
Ok((Point::new(point_range.start.row + 1, 0)..Point::new(point_range.start.row + 1, 0), true))
} else {
point_range.start.row = point_range.start.row.saturating_sub(CONTEXT_LINES);
point_range.start.column = 0;
point_range.end.row = cmp::min(
buffer.max_point().row,
point_range.end.row + CONTEXT_LINES,
);
point_range.end.column = buffer.line_len(point_range.end.row);
Ok((point_range, is_long_line))
}
})
.peekable();
while let Some(mut range) = ranges.next() {
while let Some(range_result) = ranges.next() {
let (range, is_long) = range_result?;
// Skip long lines as they were already handled
if is_long {
continue;
}
if skips_remaining > 0 {
skips_remaining -= 1;
continue;
}
// We'd already found a full page of matches, and we just found one more.
// We've found a full page of matches
if matches_found >= RESULTS_PER_PAGE {
has_more_matches = true;
return Ok(());
}
while let Some(next_range) = ranges.peek() {
if range.end.row >= next_range.start.row {
range.end = next_range.end;
ranges.next();
} else {
break;
}
break;
}
// Write file header if needed
if !file_header_written {
writeln!(output, "\n## Matches in {}", path.display())?;
let _ = writeln!(output, "\n## Matches in {}", path.display());
file_header_written = true;
}
let start_line = range.start.row + 1;
let end_line = range.end.row + 1;
writeln!(output, "\n### Lines {start_line}-{end_line}\n```")?;
output.extend(buffer.text_for_range(range));
output.push_str("\n```\n");
// Show the match with context lines
let context_start = range.start.row;
let context_end = range.end.row;
let context_range = Point::new(context_start, 0)..Point::new(context_end, buffer.line_len(context_end));
let context_text = buffer.text_for_range(context_range).collect::<String>();
matches_found += 1;
if context_text.contains(&regex) {
let _ = writeln!(output, "\n### Lines {}-{}\n```", context_start + 1, context_end + 1);
output.push_str(&context_text);
output.push_str("\n```\n");
matches_found += 1;
}
}
}
Ok(())
})??;
})?;
}
if matches_found == 0 {
@@ -154,9 +187,236 @@ impl Tool for RegexSearchTool {
offset + matches_found,
offset + RESULTS_PER_PAGE,
))
} else {
} else {
Ok(format!("Found {matches_found} matches:\n{output}"))
}
})
}
}
fn find_matches(text: String, pattern: &str) -> Vec<(usize, usize)> {
let mut matches = Vec::new();
if pattern.is_empty() {
return matches;
}
let mut start = 0;
while start < text.len() {
match text[start..].find(pattern) {
Some(pos) => {
let match_start = start + pos;
let match_end = match_start + pattern.len();
if match_end <= text.len() {
matches.push((match_start, match_end));
}
start = match_start + 1;
}
None => break,
}
}
matches
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_find_matches() {
// Test basic match finding
let text = "hello world hello".to_string();
let matches = find_matches(text, "hello");
assert_eq!(matches.len(), 2);
assert_eq!(matches[0], (0, 5));
assert_eq!(matches[1], (12, 17));
// Test overlapping matches
let overlaps = find_matches("abababa".to_string(), "aba");
assert_eq!(overlaps.len(), 3);
assert_eq!(overlaps[0], (0, 3));
assert_eq!(overlaps[1], (2, 5));
assert_eq!(overlaps[2], (4, 7));
// Test edge cases
assert_eq!(find_matches("".to_string(), "pattern"), vec![]);
assert_eq!(find_matches("text".to_string(), ""), vec![]);
}
#[test]
fn test_long_line_context_calculation() {
// Create a line that exceeds MAX_LINE_LENGTH
let prefix = "x".repeat(100);
let target = "TARGET";
let suffix = "y".repeat(300);
let long_line = format!("{}{}{}", prefix, target, suffix);
// Find the target in the long line
let matches = find_matches(long_line.clone(), target);
assert_eq!(matches.len(), 1);
let (match_start, match_end) = matches[0];
// Verify context calculation
let start_char = match_start.saturating_sub(LONG_LINE_CONTEXT);
let end_char = (match_end + LONG_LINE_CONTEXT).min(long_line.len());
// Context should start no more than LONG_LINE_CONTEXT chars before match
assert!(match_start - start_char <= LONG_LINE_CONTEXT);
// Context should end no more than LONG_LINE_CONTEXT chars after match
assert!(end_char - match_end <= LONG_LINE_CONTEXT);
// Context should contain the target
let context = &long_line[start_char..end_char];
assert!(context.contains(target));
}
#[test]
fn test_line_length_classification() {
// Test if lines are correctly classified as long or regular
let max_len = MAX_LINE_LENGTH as usize;
// Line shorter than MAX_LINE_LENGTH
let regular_line = "x".repeat(max_len - 1);
assert!(regular_line.len() < max_len);
// Line exactly at MAX_LINE_LENGTH
let boundary_line = "x".repeat(max_len);
assert_eq!(boundary_line.len(), max_len);
// Line longer than MAX_LINE_LENGTH
let long_line = "x".repeat(max_len + 1);
assert!(long_line.len() > max_len);
}
#[test]
fn test_heading_format() {
// For long lines: "### Line X, chars Y-Z"
// For regular lines: "### Lines X-Y"
let long_line_row = 42_usize;
let start_char = 100_usize;
let end_char = 340_usize;
// In the implementation, the heading format for long lines is "### Line"
let long_line_heading = format!(
"### Line {}, chars {}-{}",
long_line_row + 1,
start_char,
end_char
);
assert_eq!(long_line_heading, "### Line 43, chars 100-340");
let context_start = 40_usize;
let context_end = 44_usize;
let regular_heading = format!("### Lines {}-{}", context_start + 1, context_end + 1);
assert_eq!(regular_heading, "### Lines 41-45");
}
#[test]
fn test_pagination() {
// Initialize variables to simulate pagination
let mut skips_remaining = 2;
let mut matches_found = 0;
// Simulate processing 5 matches with offset 2
for _ in 0..5 {
if skips_remaining > 0 {
skips_remaining -= 1;
continue;
}
matches_found += 1;
}
// Should have processed 3 matches after skipping 2
assert_eq!(matches_found, 3);
assert_eq!(skips_remaining, 0);
// Test page limits
let mut matches_found = 0;
let mut has_more_matches = false;
// Simulate processing more matches than fit in a page
for _ in 0..(RESULTS_PER_PAGE + 5) {
// We'd already found a full page of matches, and we just found one more.
if matches_found >= RESULTS_PER_PAGE {
has_more_matches = true;
break;
}
matches_found += 1;
}
// Should have stopped at RESULTS_PER_PAGE
assert_eq!(matches_found, RESULTS_PER_PAGE);
assert!(has_more_matches);
}
#[test]
fn test_handling_of_very_long_lines() {
// Test very long lines with matches at different positions
// Create a very long line (3 * MAX_LINE_LENGTH)
let max_len = MAX_LINE_LENGTH as usize;
let long_prefix = "prefix_".repeat(max_len / 7);
let middle = "middle_".repeat(max_len / 7);
let long_suffix = "suffix_".repeat(max_len / 7);
let very_long_line = format!("{}{}{}", long_prefix, middle, long_suffix);
assert!(very_long_line.len() > max_len);
// Find matches for "middle" in the very long line
let matches = find_matches(very_long_line.clone(), "middle_");
assert!(!matches.is_empty());
// First match should be after the prefix
let (first_match_start, _) = matches[0];
assert!(first_match_start >= long_prefix.len());
// With 120 chars of context, we should not see the start of the string
let context_start = first_match_start.saturating_sub(LONG_LINE_CONTEXT);
assert!(context_start > 0); // Context should not include the very beginning
// But context should include the match
let context_end = first_match_start + 7 + LONG_LINE_CONTEXT; // "middle_" is 7 chars
let context = &very_long_line[context_start..context_end.min(very_long_line.len())];
assert!(context.contains("middle_"));
}
#[test]
fn test_output_format() {
// Test format consistency
// For a long line match (> 240 chars), we show:
// - Only the matched line (no context lines)
// - Format: "### Line X, chars Y-Z"
// - Context: 120 chars before and after the match
// For regular lines (< 240 chars), we show:
// - The matched line plus context (2 lines before, 2 lines after)
// - Format: "### Lines X-Y"
// Test that multiple matches in a long line are shown separately
let row = 42_usize;
let match1_start = 100_usize;
let match1_end = 110_usize;
let match2_start = 300_usize;
let match2_end = 310_usize;
let heading1 = format!(
"### Line {}, chars {}-{}",
row + 1,
match1_start.saturating_sub(LONG_LINE_CONTEXT),
(match1_end + LONG_LINE_CONTEXT)
);
let heading2 = format!(
"### Line {}, chars {}-{}",
row + 1,
match2_start.saturating_sub(LONG_LINE_CONTEXT),
(match2_end + LONG_LINE_CONTEXT)
);
// Each match should have its own heading
assert_ne!(heading1, heading2);
}
}