Compare commits

...

5 Commits

Author SHA1 Message Date
Will Bradley
1ea0c9634c Merge remote-tracking branch 'origin/main' into look-behind-regex 2024-11-06 09:26:03 -07:00
Will Bradley
4bbdf18280 explicating this idea... 2024-11-05 22:17:53 -07:00
Will Bradley
83b191cb17 wip 2024-11-05 19:03:29 -07:00
Will Bradley
6cde94d0bc initial idea to start 2024-11-05 19:01:23 -07:00
Will Bradley
6823ac624a fix a typo in ansible.md 2024-11-05 16:30:06 -07:00
4 changed files with 148 additions and 108 deletions

29
Cargo.lock generated
View File

@@ -1636,6 +1636,15 @@ dependencies = [
"bit-vec 0.7.0",
]
[[package]]
name = "bit-set"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [
"bit-vec 0.8.0",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
@@ -1648,6 +1657,12 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22"
[[package]]
name = "bit-vec"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
[[package]]
name = "bit_field"
version = "0.10.2"
@@ -4225,6 +4240,17 @@ dependencies = [
"regex",
]
[[package]]
name = "fancy-regex"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
dependencies = [
"bit-set 0.8.0",
"regex-automata 0.4.7",
"regex-syntax 0.8.4",
]
[[package]]
name = "fast-srgb8"
version = "1.0.0"
@@ -8869,6 +8895,7 @@ dependencies = [
"clock",
"collections",
"env_logger 0.11.5",
"fancy-regex 0.14.0",
"fs",
"futures 0.3.30",
"fuzzy",
@@ -12214,7 +12241,7 @@ dependencies = [
"anyhow",
"base64 0.21.7",
"bstr",
"fancy-regex",
"fancy-regex 0.12.0",
"lazy_static",
"parking_lot",
"rustc-hash 1.1.0",

View File

@@ -350,6 +350,7 @@ ec4rs = "1.1"
emojis = "0.6.1"
env_logger = "0.11"
exec = "0.3.1"
fancy-regex = "0.14.0"
fork = "0.2.0"
futures = "0.3"
futures-batch = "0.6.1"

View File

@@ -30,6 +30,7 @@ async-trait.workspace = true
client.workspace = true
clock.workspace = true
collections.workspace = true
fancy-regex.workspace = true
fs.workspace = true
futures.workspace = true
fuzzy.workspace = true

View File

@@ -1,6 +1,7 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use anyhow::Result;
use client::proto;
use fancy_regex::Regex as FancyRegex;
use gpui::Model;
use language::{Buffer, BufferSnapshot};
use regex::{Captures, Regex, RegexBuilder};
@@ -54,26 +55,75 @@ impl SearchInputs {
&self.buffers
}
}
#[derive(Clone, Debug)]
pub enum SearchQuery {
Text {
search: Arc<AhoCorasick>,
replacement: Option<String>,
whole_word: bool,
case_sensitive: bool,
include_ignored: bool,
inner: SearchInputs,
},
Regex {
regex: Regex,
replacement: Option<String>,
multiline: bool,
whole_word: bool,
case_sensitive: bool,
include_ignored: bool,
inner: SearchInputs,
},
#[derive(Clone, Debug)]
enum RegexEngine {
Regex(Regex),
FancyRegex(FancyRegex),
}
impl RegexEngine {
fn detect(&self, text: &str) -> Result<bool> {
match self {
Self::Regex(regex) => Ok(regex.find(text).is_some()),
Self::FancyRegex(fancy_regex) => Ok(fancy_regex.find(text)?.is_some()),
}
}
fn replace<'a>(&self, text: &'a str, replacement: &str) -> Cow<'a, str> {
match self {
Self::Regex(regex) => regex.replace(text, replacement),
Self::FancyRegex(fancy_regex) => fancy_regex.replace(text, replacement),
}
}
async fn find_and_extend_matches(
&self,
text: &str,
offset: usize,
matches: &mut Vec<Range<usize>>,
yield_interval: usize,
) {
match self {
Self::Regex(regex) => {
for (i, mat) in regex.find_iter(text).enumerate() {
if (i + 1) % yield_interval == 0 {
yield_now().await;
}
matches.push(mat.start() + offset..mat.end() + offset)
}
}
Self::FancyRegex(fancy_regex) => {
for (i, mat) in fancy_regex.find_iter(text).enumerate() {
if (i + 1) % yield_interval == 0 {
// REVIEW: revisit this yield interval and how it interacts with the outer
// line loop, etc...
yield_now().await;
}
if let Ok(mat) = mat {
matches.push(mat.start() + offset..mat.end() + offset)
} else {
// REVIEW: can consider ignoring or percolating up, or logging to see
// if this ever actually happens.
}
}
}
}
}
}
#[derive(Clone, Debug)]
pub struct SearchQuery {
method: SearchQueryMethod,
replacement: Option<String>,
pub whole_word: bool,
pub case_sensitive: bool,
inner: SearchInputs,
pub include_ignored: bool,
}
#[derive(Clone, Debug)]
pub enum SearchQueryMethod {
Text { search: Arc<AhoCorasick> },
Regex { regex: RegexEngine, multiline: bool },
}
impl SearchQuery {
@@ -96,8 +146,10 @@ impl SearchQuery {
files_to_include,
buffers,
};
Ok(Self::Text {
search: Arc::new(search),
Ok(Self {
method: SearchQueryMethod::Text {
search: Arc::new(search),
},
replacement: None,
whole_word,
case_sensitive,
@@ -129,17 +181,24 @@ impl SearchQuery {
let regex = RegexBuilder::new(&query)
.case_insensitive(!case_sensitive)
.multi_line(multiline)
.build()?;
.build()
.map_err(|e| {
// REVIEW: Found by using: (?<!user|tenant)RecordId = randomUUID\(\);
log::error!("Failed to build regex: {}", e);
e
})?;
let inner = SearchInputs {
query: initial_query,
files_to_exclude,
files_to_include,
buffers,
};
Ok(Self::Regex {
regex,
Ok(Self {
method: SearchQueryMethod::Regex {
regex: RegexEngine::Regex(regex),
multiline,
},
replacement: None,
multiline,
whole_word,
case_sensitive,
include_ignored,
@@ -172,28 +231,17 @@ impl SearchQuery {
}
pub fn with_replacement(mut self, new_replacement: String) -> Self {
match self {
Self::Text {
ref mut replacement,
..
}
| Self::Regex {
ref mut replacement,
..
} => {
*replacement = Some(new_replacement);
self
}
}
self.replacement = Some(new_replacement);
self
}
pub fn to_proto(&self) -> proto::SearchQuery {
proto::SearchQuery {
query: self.as_str().to_string(),
regex: self.is_regex(),
whole_word: self.whole_word(),
case_sensitive: self.case_sensitive(),
include_ignored: self.include_ignored(),
whole_word: self.whole_word,
case_sensitive: self.case_sensitive,
include_ignored: self.include_ignored,
files_to_include: self.files_to_include().sources().join(","),
files_to_exclude: self.files_to_exclude().sources().join(","),
}
@@ -204,8 +252,8 @@ impl SearchQuery {
return Ok(false);
}
match self {
Self::Text { search, .. } => {
match &self.method {
SearchQueryMethod::Text { search, .. } => {
let mat = search.stream_find_iter(stream).next();
match mat {
Some(Ok(_)) => Ok(true),
@@ -213,7 +261,7 @@ impl SearchQuery {
None => Ok(false),
}
}
Self::Regex {
SearchQueryMethod::Regex {
regex, multiline, ..
} => {
let mut reader = BufReader::new(stream);
@@ -222,12 +270,12 @@ impl SearchQuery {
if let Err(err) = reader.read_to_string(&mut text) {
Err(err.into())
} else {
Ok(regex.find(&text).is_some())
regex.detect(&text)
}
} else {
for line in reader.lines() {
let line = line?;
if regex.find(&line).is_some() {
if regex.detect(&line)? {
return Ok(true);
}
}
@@ -238,20 +286,14 @@ impl SearchQuery {
}
/// Returns the replacement text for this `SearchQuery`.
pub fn replacement(&self) -> Option<&str> {
match self {
SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
replacement.as_deref()
}
}
self.replacement.as_deref()
}
/// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
match self {
SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
SearchQuery::Regex {
regex, replacement, ..
} => {
if let Some(replacement) = replacement {
match &self.method {
SearchQueryMethod::Text { .. } => self.replacement.clone().map(Cow::from),
SearchQueryMethod::Regex { regex, .. } => {
if let Some(ref replacement) = self.replacement {
let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
.get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
.replace_all(replacement, |c: &Captures| {
@@ -262,7 +304,7 @@ impl SearchQuery {
x => unreachable!("Unexpected escape sequence: {}", x),
}
});
Some(regex.replace(text, replacement))
Some(regex.replace(text, &replacement))
} else {
None
}
@@ -288,11 +330,9 @@ impl SearchQuery {
buffer.as_rope().clone()
};
let mut matches = Vec::new();
match self {
Self::Text {
search, whole_word, ..
} => {
let mut matches: Vec<Range<usize>> = Vec::new();
match &self.method {
SearchQueryMethod::Text { search, .. } => {
for (ix, mat) in search
.stream_find_iter(rope.bytes_in_range(0..rope.len()))
.enumerate()
@@ -302,7 +342,7 @@ impl SearchQuery {
}
let mat = mat.unwrap();
if *whole_word {
if self.whole_word {
let classifier = buffer.char_classifier_at(range_offset + mat.start());
let prev_kind = rope
@@ -322,18 +362,14 @@ impl SearchQuery {
}
}
Self::Regex {
SearchQueryMethod::Regex {
regex, multiline, ..
} => {
if *multiline {
let text = rope.to_string();
for (ix, mat) in regex.find_iter(&text).enumerate() {
if (ix + 1) % YIELD_INTERVAL == 0 {
yield_now().await;
}
matches.push(mat.start()..mat.end());
}
regex
.find_and_extend_matches(&text, 0, &mut matches, YIELD_INTERVAL)
.await;
} else {
let mut line = String::new();
let mut line_offset = 0;
@@ -344,12 +380,14 @@ impl SearchQuery {
for (newline_ix, text) in chunk.split('\n').enumerate() {
if newline_ix > 0 {
for mat in regex.find_iter(&line) {
let start = line_offset + mat.start();
let end = line_offset + mat.end();
matches.push(start..end);
}
regex
.find_and_extend_matches(
&line,
line_offset,
&mut matches,
YIELD_INTERVAL,
)
.await;
line_offset += line.len() + 1;
line.clear();
}
@@ -371,33 +409,8 @@ impl SearchQuery {
self.as_inner().as_str()
}
pub fn whole_word(&self) -> bool {
match self {
Self::Text { whole_word, .. } => *whole_word,
Self::Regex { whole_word, .. } => *whole_word,
}
}
pub fn case_sensitive(&self) -> bool {
match self {
Self::Text { case_sensitive, .. } => *case_sensitive,
Self::Regex { case_sensitive, .. } => *case_sensitive,
}
}
pub fn include_ignored(&self) -> bool {
match self {
Self::Text {
include_ignored, ..
} => *include_ignored,
Self::Regex {
include_ignored, ..
} => *include_ignored,
}
}
pub fn is_regex(&self) -> bool {
matches!(self, Self::Regex { .. })
matches!(self.method, SearchQueryMethod::Regex { .. })
}
pub fn files_to_include(&self) -> &PathMatcher {
@@ -436,9 +449,7 @@ impl SearchQuery {
}
}
pub fn as_inner(&self) -> &SearchInputs {
match self {
Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
}
&self.inner
}
}