Compare commits

...

7 Commits

Author SHA1 Message Date
Julia Ryan
1caafceac0 Format 2025-11-06 14:16:15 -08:00
Julia Ryan
dc5e3fafc9 Fix a bunch of things
Co-authored-by: David Kleingeld <davidsk@zed.dev>
2025-11-06 14:10:56 -08:00
Julia Ryan
2b1e2571d0 Fix smart_case 2025-11-06 11:41:52 -08:00
Julia Ryan
9cbc6d174e Weight path matches differently
Co-authored-by: David Kleingeld <davidsk@zed.dev>
2025-11-06 11:41:52 -08:00
Julia Ryan
509686c428 wip 2025-11-06 11:41:52 -08:00
Julia Ryan
ef1624738d Add fuzzy path matching
Add path match test

Fix segment splitting

Co-authored-by: David Kleingeld <davidsk@zed.dev>

Add tests and tweak ordering

Co-authored-by: David Kleingeld <davidsk@zed.dev>

Fix penalize_length

Fix all the Ord impls

Co-authored-by: David Kleingeld <davidsk@zed.dev>

Fix file_finder ordering

Co-authored-by: David Kleingeld <davidsk@zed.dev>

Add tests
2025-11-06 11:41:51 -08:00
Julia Ryan
0f3993b92a Use nucleo for fuzzy string matching
Co-authored-by: David Kleingeld <davidsk@zed.dev>
2025-11-06 11:41:51 -08:00
12 changed files with 709 additions and 905 deletions

23
Cargo.lock generated
View File

@@ -6653,7 +6653,9 @@ version = "0.1.0"
dependencies = [
"gpui",
"log",
"nucleo",
"util",
"util_macros",
]
[[package]]
@@ -10466,6 +10468,27 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "nucleo"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5262af4c94921c2646c5ac6ff7900c2af9cbb08dc26a797e18130a7019c039d4"
dependencies = [
"nucleo-matcher",
"parking_lot",
"rayon",
]
[[package]]
name = "nucleo-matcher"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf33f538733d1a5a3494b836ba913207f14d9d4a1d3cd67030c5061bdd2cac85"
dependencies = [
"memchr",
"unicode-segmentation",
]
[[package]]
name = "num"
version = "0.4.3"

View File

@@ -547,6 +547,7 @@ naga = { version = "25.0", features = ["wgsl-in"] }
nanoid = "0.4"
nbformat = { git = "https://github.com/ConradIrwin/runtimed", rev = "7130c804216b6914355d15d0b91ea91f6babd734" }
nix = "0.29"
nucleo = "0.5"
num-format = "0.4.4"
num-traits = "0.2"
objc = "0.2"

View File

@@ -42,15 +42,14 @@ async fn test_fuzzy_score(cx: &mut TestAppContext) {
CompletionBuilder::constant("ElementType", None, "7fffffff"),
];
let matches =
filter_and_sort_matches("Elem", &completions, SnippetSortOrder::default(), cx).await;
filter_and_sort_matches("elem", &completions, SnippetSortOrder::default(), cx).await;
assert_eq!(
matches
.iter()
.map(|m| m.string.as_str())
.collect::<Vec<_>>(),
vec!["ElementType", "element_type"]
vec!["element_type", "ElementType"]
);
assert!(matches[0].score > matches[1].score);
}
// fuzzy takes over sort_text and sort_kind
@@ -135,9 +134,6 @@ async fn test_sort_text(cx: &mut TestAppContext) {
// for each prefix, first item should always be one with lower sort_text
assert_eq!(matches[0].string, "unreachable!(…)");
assert_eq!(matches[1].string, "unreachable");
// fuzzy score should match for first two items as query is common prefix
assert_eq!(matches[0].score, matches[1].score);
})
.await;
@@ -146,9 +142,6 @@ async fn test_sort_text(cx: &mut TestAppContext) {
// exact match comes first
assert_eq!(matches[0].string, "unreachable");
assert_eq!(matches[1].string, "unreachable!(…)");
// fuzzy score should match for first two items as query is common prefix
assert_eq!(matches[0].score, matches[1].score);
}
}
@@ -325,12 +318,17 @@ async fn filter_and_sort_matches(
let matches = fuzzy::match_strings(
&candidates,
query,
query.chars().any(|c| c.is_uppercase()),
true,
false,
100,
&cancel_flag,
background_executor,
)
.await;
CompletionsMenu::sort_string_matches(matches, Some(query), snippet_sort_order, completions)
dbg!(CompletionsMenu::sort_string_matches(
matches,
Some(query),
snippet_sort_order,
completions
))
}

View File

@@ -1060,7 +1060,7 @@ impl CompletionsMenu {
fuzzy::match_strings(
&match_candidates,
&query,
query.chars().any(|c| c.is_uppercase()),
true,
false,
1000,
&cancel_filter,
@@ -1170,7 +1170,7 @@ impl CompletionsMenu {
});
}
matches.sort_unstable_by_key(|string_match| {
let match_tier = |string_match: &StringMatch| {
let completion = &completions[string_match.candidate_id];
let is_snippet = matches!(
@@ -1186,9 +1186,7 @@ impl CompletionsMenu {
};
let (sort_kind, sort_label) = completion.sort_key();
let score = string_match.score;
let sort_score = Reverse(OrderedFloat(score));
let sort_score = Reverse(OrderedFloat(string_match.score.floor()));
let query_start_doesnt_match_split_words = query_start_lower
.map(|query_char| {
@@ -1225,7 +1223,11 @@ impl CompletionsMenu {
sort_label,
}
}
});
};
let tomato: Vec<_> = matches.iter().map(match_tier).collect();
dbg!(tomato);
matches.sort_unstable_by_key(|string_match| match_tier(string_match));
matches
}

View File

@@ -337,8 +337,8 @@ impl FileFinder {
}
}
Match::Search(m) => ProjectPath {
worktree_id: WorktreeId::from_usize(m.0.worktree_id),
path: m.0.path.clone(),
worktree_id: WorktreeId::from_usize(m.worktree_id),
path: m.path.clone(),
},
Match::CreateNew(p) => p.clone(),
};
@@ -418,39 +418,6 @@ pub struct FileFinderDelegate {
include_ignored_refresh: Task<()>,
}
/// Use a custom ordering for file finder: the regular one
/// defines max element with the highest score and the latest alphanumerical path (in case of a tie on other params), e.g:
/// `[{score: 0.5, path = "c/d" }, { score: 0.5, path = "/a/b" }]`
///
/// In the file finder, we would prefer to have the max element with the highest score and the earliest alphanumerical path, e.g:
/// `[{ score: 0.5, path = "/a/b" }, {score: 0.5, path = "c/d" }]`
/// as the files are shown in the project panel lists.
#[derive(Debug, Clone, PartialEq, Eq)]
struct ProjectPanelOrdMatch(PathMatch);
impl Ord for ProjectPanelOrdMatch {
fn cmp(&self, other: &Self) -> cmp::Ordering {
self.0
.score
.partial_cmp(&other.0.score)
.unwrap_or(cmp::Ordering::Equal)
.then_with(|| self.0.worktree_id.cmp(&other.0.worktree_id))
.then_with(|| {
other
.0
.distance_to_relative_ancestor
.cmp(&self.0.distance_to_relative_ancestor)
})
.then_with(|| self.0.path.cmp(&other.0.path).reverse())
}
}
impl PartialOrd for ProjectPanelOrdMatch {
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
Some(self.cmp(other))
}
}
#[derive(Debug, Default)]
struct Matches {
separate_history: bool,
@@ -461,9 +428,9 @@ struct Matches {
enum Match {
History {
path: FoundPath,
panel_match: Option<ProjectPanelOrdMatch>,
panel_match: Option<PathMatch>,
},
Search(ProjectPanelOrdMatch),
Search(PathMatch),
CreateNew(ProjectPath),
}
@@ -471,7 +438,7 @@ impl Match {
fn relative_path(&self) -> Option<&Arc<RelPath>> {
match self {
Match::History { path, .. } => Some(&path.project.path),
Match::Search(panel_match) => Some(&panel_match.0.path),
Match::Search(panel_match) => Some(&panel_match.path),
Match::CreateNew(_) => None,
}
}
@@ -479,7 +446,7 @@ impl Match {
fn abs_path(&self, project: &Entity<Project>, cx: &App) -> Option<PathBuf> {
match self {
Match::History { path, .. } => Some(path.absolute.clone()),
Match::Search(ProjectPanelOrdMatch(path_match)) => Some(
Match::Search(path_match) => Some(
project
.read(cx)
.worktree_for_id(WorktreeId::from_usize(path_match.worktree_id), cx)?
@@ -490,7 +457,7 @@ impl Match {
}
}
fn panel_match(&self) -> Option<&ProjectPanelOrdMatch> {
fn panel_match(&self) -> Option<&PathMatch> {
match self {
Match::History { panel_match, .. } => panel_match.as_ref(),
Match::Search(panel_match) => Some(panel_match),
@@ -531,9 +498,7 @@ impl Matches {
.ok_or(0)
} else {
self.matches.binary_search_by(|m| {
// `reverse()` since if cmp_matches(a, b) == Ordering::Greater, then a is better than b.
// And we want the better entries go first.
Self::cmp_matches(self.separate_history, currently_opened, m, entry).reverse()
Self::cmp_matches(self.separate_history, currently_opened, m, entry)
})
}
}
@@ -545,7 +510,7 @@ impl Matches {
history_items: impl IntoIterator<Item = &'a FoundPath> + Clone,
currently_opened: Option<&'a FoundPath>,
query: Option<&FileSearchQuery>,
new_search_matches: impl Iterator<Item = ProjectPanelOrdMatch>,
new_search_matches: impl Iterator<Item = PathMatch>,
extend_old_matches: bool,
path_style: PathStyle,
) {
@@ -583,8 +548,8 @@ impl Matches {
let new_search_matches: Vec<Match> = new_search_matches
.filter(|path_match| {
!new_history_matches.contains_key(&ProjectPath {
path: path_match.0.path.clone(),
worktree_id: WorktreeId::from_usize(path_match.0.worktree_id),
path: path_match.path.clone(),
worktree_id: WorktreeId::from_usize(path_match.worktree_id),
})
})
.map(Match::Search)
@@ -604,7 +569,7 @@ impl Matches {
// It is possible that the new search matches' paths contain some of the old search matches' paths.
// History matches' paths are unique, since store in a HashMap by path.
// We build a sorted Vec<Match>, eliminating duplicate search matches.
// Search matches with the same paths should have equal `ProjectPanelOrdMatch`, so we should
// Search matches with the same paths should have equal `PathMatch`, so we should
// not have any duplicates after building the final list.
for new_match in new_history_matches
.into_values()
@@ -622,7 +587,6 @@ impl Matches {
}
}
/// If a < b, then a is a worse match, aligning with the `ProjectPanelOrdMatch` ordering.
fn cmp_matches(
separate_history: bool,
currently_opened: Option<&FoundPath>,
@@ -687,19 +651,19 @@ impl Matches {
}
/// Determines if the match occurred within the filename rather than in the path
fn is_filename_match(panel_match: &ProjectPanelOrdMatch) -> bool {
if panel_match.0.positions.is_empty() {
fn is_filename_match(panel_match: &PathMatch) -> bool {
if panel_match.positions.is_empty() {
return false;
}
if let Some(filename) = panel_match.0.path.file_name() {
let path_str = panel_match.0.path.as_unix_str();
if let Some(filename) = panel_match.path.file_name() {
let path_str = panel_match.path.as_unix_str();
if let Some(filename_pos) = path_str.rfind(filename)
&& panel_match.0.positions[0] >= filename_pos
&& panel_match.positions[0] >= filename_pos
{
let mut prev_position = panel_match.0.positions[0];
for p in &panel_match.0.positions[1..] {
let mut prev_position = panel_match.positions[0];
for p in &panel_match.positions[1..] {
if *p != prev_position + 1 {
return false;
}
@@ -783,7 +747,7 @@ fn matching_history_items<'a>(
project_path.clone(),
Match::History {
path: found_path.clone(),
panel_match: Some(ProjectPanelOrdMatch(path_match)),
panel_match: Some(path_match),
},
)
})
@@ -929,9 +893,7 @@ impl FileFinderDelegate {
&cancel_flag,
cx.background_executor().clone(),
)
.await
.into_iter()
.map(ProjectPanelOrdMatch);
.await;
let did_cancel = cancel_flag.load(atomic::Ordering::Acquire);
picker
.update(cx, |picker, cx| {
@@ -948,7 +910,7 @@ impl FileFinderDelegate {
search_id: usize,
did_cancel: bool,
query: FileSearchQuery,
matches: impl IntoIterator<Item = ProjectPanelOrdMatch>,
matches: impl IntoIterator<Item = PathMatch>,
cx: &mut Context<Picker<Self>>,
) {
if search_id >= self.latest_search_id {
@@ -1055,7 +1017,7 @@ impl FileFinderDelegate {
.filter(|worktree| worktree.read(cx).is_visible());
if let Some(panel_match) = panel_match {
self.labels_for_path_match(&panel_match.0, path_style)
self.labels_for_path_match(&panel_match, path_style)
} else if let Some(worktree) = worktree {
let full_path =
worktree.read(cx).root_name().join(&entry_path.project.path);
@@ -1082,7 +1044,7 @@ impl FileFinderDelegate {
)
}
}
Match::Search(path_match) => self.labels_for_path_match(&path_match.0, path_style),
Match::Search(path_match) => self.labels_for_path_match(&path_match, path_style),
Match::CreateNew(project_path) => (
format!("Create file: {}", project_path.path.display(path_style)),
vec![],
@@ -1244,7 +1206,7 @@ impl FileFinderDelegate {
if let Some((worktree, relative_path)) =
project.find_worktree(query_path, cx)
{
path_matches.push(ProjectPanelOrdMatch(PathMatch {
path_matches.push(PathMatch {
score: 1.0,
positions: Vec::new(),
worktree_id: worktree.read(cx).id().to_usize(),
@@ -1252,7 +1214,7 @@ impl FileFinderDelegate {
path_prefix: RelPath::empty().into(),
is_dir: false, // File finder doesn't support directories
distance_to_relative_ancestor: usize::MAX,
}));
});
}
})
.log_err();
@@ -1358,11 +1320,12 @@ impl PickerDelegate for FileFinderDelegate {
window: &mut Window,
cx: &mut Context<Picker<Self>>,
) -> Task<()> {
let raw_query = raw_query.replace(' ', "");
let raw_query = raw_query.trim();
let raw_query = match &raw_query.get(0..2) {
Some(".\\" | "./") => &raw_query[2..],
// git diff can add `a\`, `a/`, `b\` or `b/` in front of paths. This
// makes it easy to copy those into the file finder.
Some(prefix @ ("a\\" | "a/" | "b\\" | "b/")) => {
if self
.workspace
@@ -1561,8 +1524,8 @@ impl PickerDelegate for FileFinderDelegate {
Match::Search(m) => split_or_open(
workspace,
ProjectPath {
worktree_id: WorktreeId::from_usize(m.0.worktree_id),
path: m.0.path.clone(),
worktree_id: WorktreeId::from_usize(m.worktree_id),
path: m.path.clone(),
},
window,
cx,
@@ -1831,7 +1794,7 @@ impl<'a> PathComponentSlice<'a> {
fn elision_range(&self, budget: usize, matches: &[usize]) -> Option<Range<usize>> {
let eligible_range = {
assert!(matches.windows(2).all(|w| w[0] <= w[1]));
assert!(matches.is_sorted());
let mut matches = matches.iter().copied().peekable();
let mut longest: Option<Range<usize>> = None;
let mut cur = 0..0;

View File

@@ -74,7 +74,7 @@ fn test_path_elision() {
#[test]
fn test_custom_project_search_ordering_in_file_finder() {
let mut file_finder_sorted_output = vec![
ProjectPanelOrdMatch(PathMatch {
PathMatch {
score: 0.5,
positions: Vec::new(),
worktree_id: 0,
@@ -82,8 +82,8 @@ fn test_custom_project_search_ordering_in_file_finder() {
path_prefix: rel_path("").into(),
distance_to_relative_ancestor: 0,
is_dir: false,
}),
ProjectPanelOrdMatch(PathMatch {
},
PathMatch {
score: 1.0,
positions: Vec::new(),
worktree_id: 0,
@@ -91,8 +91,8 @@ fn test_custom_project_search_ordering_in_file_finder() {
path_prefix: rel_path("").into(),
distance_to_relative_ancestor: 0,
is_dir: false,
}),
ProjectPanelOrdMatch(PathMatch {
},
PathMatch {
score: 1.0,
positions: Vec::new(),
worktree_id: 0,
@@ -100,8 +100,8 @@ fn test_custom_project_search_ordering_in_file_finder() {
path_prefix: rel_path("").into(),
distance_to_relative_ancestor: 0,
is_dir: false,
}),
ProjectPanelOrdMatch(PathMatch {
},
PathMatch {
score: 0.5,
positions: Vec::new(),
worktree_id: 0,
@@ -109,8 +109,8 @@ fn test_custom_project_search_ordering_in_file_finder() {
path_prefix: rel_path("").into(),
distance_to_relative_ancestor: 0,
is_dir: false,
}),
ProjectPanelOrdMatch(PathMatch {
},
PathMatch {
score: 1.0,
positions: Vec::new(),
worktree_id: 0,
@@ -118,14 +118,14 @@ fn test_custom_project_search_ordering_in_file_finder() {
path_prefix: rel_path("").into(),
distance_to_relative_ancestor: 0,
is_dir: false,
}),
},
];
file_finder_sorted_output.sort_by(|a, b| b.cmp(a));
assert_eq!(
file_finder_sorted_output,
vec![
ProjectPanelOrdMatch(PathMatch {
PathMatch {
score: 1.0,
positions: Vec::new(),
worktree_id: 0,
@@ -133,8 +133,8 @@ fn test_custom_project_search_ordering_in_file_finder() {
path_prefix: rel_path("").into(),
distance_to_relative_ancestor: 0,
is_dir: false,
}),
ProjectPanelOrdMatch(PathMatch {
},
PathMatch {
score: 1.0,
positions: Vec::new(),
worktree_id: 0,
@@ -142,8 +142,8 @@ fn test_custom_project_search_ordering_in_file_finder() {
path_prefix: rel_path("").into(),
distance_to_relative_ancestor: 0,
is_dir: false,
}),
ProjectPanelOrdMatch(PathMatch {
},
PathMatch {
score: 1.0,
positions: Vec::new(),
worktree_id: 0,
@@ -151,8 +151,8 @@ fn test_custom_project_search_ordering_in_file_finder() {
path_prefix: rel_path("").into(),
distance_to_relative_ancestor: 0,
is_dir: false,
}),
ProjectPanelOrdMatch(PathMatch {
},
PathMatch {
score: 0.5,
positions: Vec::new(),
worktree_id: 0,
@@ -160,8 +160,8 @@ fn test_custom_project_search_ordering_in_file_finder() {
path_prefix: rel_path("").into(),
distance_to_relative_ancestor: 0,
is_dir: false,
}),
ProjectPanelOrdMatch(PathMatch {
},
PathMatch {
score: 0.5,
positions: Vec::new(),
worktree_id: 0,
@@ -169,7 +169,7 @@ fn test_custom_project_search_ordering_in_file_finder() {
path_prefix: rel_path("").into(),
distance_to_relative_ancestor: 0,
is_dir: false,
}),
},
]
);
}
@@ -210,9 +210,9 @@ async fn test_matching_paths(cx: &mut TestAppContext) {
"bandana",
"./bandana",
".\\bandana",
util::path!("a/bandana"),
"b/bandana",
"b\\bandana",
util::path!("a/bandana"), // 'b/' or 'b\` get trimmed off.
"b/bandana", // 'b/' gets trimmed off.
"b\\bandana", // 'b\' gets trimmed off.
" bandana",
"bandana ",
" bandana ",
@@ -629,10 +629,7 @@ async fn test_matching_cancellation(cx: &mut TestAppContext) {
delegate.latest_search_id,
true, // did-cancel
query.clone(),
vec![
ProjectPanelOrdMatch(matches[1].clone()),
ProjectPanelOrdMatch(matches[3].clone()),
],
vec![matches[1].clone(), matches[3].clone()],
cx,
);
@@ -642,11 +639,7 @@ async fn test_matching_cancellation(cx: &mut TestAppContext) {
delegate.latest_search_id,
true, // did-cancel
query.clone(),
vec![
ProjectPanelOrdMatch(matches[0].clone()),
ProjectPanelOrdMatch(matches[2].clone()),
ProjectPanelOrdMatch(matches[3].clone()),
],
vec![matches[0].clone(), matches[2].clone(), matches[3].clone()],
cx,
);
@@ -1170,11 +1163,8 @@ async fn test_history_items_uniqueness_for_multiple_worktree(cx: &mut TestAppCon
}
if let Match::Search(path_match) = &matches[1] {
assert_eq!(
WorktreeId::from_usize(path_match.0.worktree_id),
worktree_id2
);
assert_eq!(path_match.0.path.as_ref(), rel_path("package.json"));
assert_eq!(WorktreeId::from_usize(path_match.worktree_id), worktree_id2);
assert_eq!(path_match.path.as_ref(), rel_path("package.json"));
}
});
}
@@ -1585,10 +1575,10 @@ async fn test_history_match_positions(cx: &mut gpui::TestAppContext) {
[Match::History { .. }, Match::CreateNew { .. }]
);
assert_eq!(
matches[0].panel_match().unwrap().0.path.as_ref(),
matches[0].panel_match().unwrap().path.as_ref(),
rel_path("test/first.rs")
);
assert_eq!(matches[0].panel_match().unwrap().0.positions, &[5, 6, 7]);
assert_eq!(matches[0].panel_match().unwrap().positions, &[5, 6, 7]);
let (file_label, path_label) =
finder
@@ -2764,10 +2754,10 @@ async fn test_history_items_uniqueness_for_multiple_worktree_open_all_files(
assert_eq!(path.project.worktree_id, worktree_id2);
assert_eq!(path.project.path.as_ref(), rel_path("package.json"));
let panel_match = panel_match.as_ref().unwrap();
assert_eq!(panel_match.0.path_prefix, rel_path("repo2").into());
assert_eq!(panel_match.0.path, rel_path("package.json").into());
assert_eq!(panel_match.path_prefix, rel_path("repo2").into());
assert_eq!(panel_match.path, rel_path("package.json").into());
assert_eq!(
panel_match.0.positions,
panel_match.positions,
vec![6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
);
}
@@ -2776,10 +2766,10 @@ async fn test_history_items_uniqueness_for_multiple_worktree_open_all_files(
assert_eq!(path.project.worktree_id, worktree_id1);
assert_eq!(path.project.path.as_ref(), rel_path("package.json"));
let panel_match = panel_match.as_ref().unwrap();
assert_eq!(panel_match.0.path_prefix, rel_path("repo1").into());
assert_eq!(panel_match.0.path, rel_path("package.json").into());
assert_eq!(panel_match.path_prefix, rel_path("repo1").into());
assert_eq!(panel_match.path, rel_path("package.json").into());
assert_eq!(
panel_match.0.positions,
panel_match.positions,
vec![6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
);
}
@@ -3309,7 +3299,7 @@ fn collect_search_matches(picker: &Picker<FileFinderDelegate>) -> SearchEntries
if let Some(path_match) = path_match.as_ref() {
search_entries
.history
.push(path_match.0.path_prefix.join(&path_match.0.path));
.push(path_match.path_prefix.join(&path_match.path));
} else {
// This occurs when the query is empty and we show history matches
// that are outside the project.
@@ -3322,8 +3312,8 @@ fn collect_search_matches(picker: &Picker<FileFinderDelegate>) -> SearchEntries
Match::Search(path_match) => {
search_entries
.search
.push(path_match.0.path_prefix.join(&path_match.0.path));
search_entries.search_matches.push(path_match.0.clone());
.push(path_match.path_prefix.join(&path_match.path));
search_entries.search_matches.push(path_match.clone());
}
Match::CreateNew(_) => {}
}
@@ -3358,7 +3348,7 @@ fn assert_match_at_position(
.unwrap_or_else(|| panic!("Finder has no match for index {match_index}"));
let match_file_name = match &match_item {
Match::History { path, .. } => path.absolute.file_name().and_then(|s| s.to_str()),
Match::Search(path_match) => path_match.0.path.file_name(),
Match::Search(path_match) => path_match.path.file_name(),
Match::CreateNew(project_path) => project_path.path.file_name(),
}
.unwrap();

View File

@@ -16,6 +16,9 @@ doctest = false
gpui.workspace = true
util.workspace = true
log.workspace = true
nucleo.workspace = true
[dev-dependencies]
util = {workspace = true, features = ["test-support"]}
gpui = { workspace = true, "features" = ["test-support"] }
util_macros.workspace = true

View File

@@ -1,599 +1,35 @@
use std::{
borrow::Borrow,
collections::BTreeMap,
sync::atomic::{self, AtomicBool},
};
use std::sync::Mutex;
use crate::CharBag;
const BASE_DISTANCE_PENALTY: f64 = 0.6;
const ADDITIONAL_DISTANCE_PENALTY: f64 = 0.05;
const MIN_DISTANCE_PENALTY: f64 = 0.2;
// TODO:
// Use `Path` instead of `&str` for paths.
pub struct Matcher<'a> {
query: &'a [char],
lowercase_query: &'a [char],
query_char_bag: CharBag,
smart_case: bool,
penalize_length: bool,
min_score: f64,
match_positions: Vec<usize>,
last_positions: Vec<usize>,
score_matrix: Vec<Option<f64>>,
best_position_matrix: Vec<usize>,
pub static MATCHERS: Mutex<Vec<nucleo::Matcher>> = Mutex::new(Vec::new());
pub fn get_matcher(config: nucleo::Config) -> nucleo::Matcher {
let mut matchers = MATCHERS.lock().unwrap();
let mut matcher = matchers
.pop()
.unwrap_or_else(|| nucleo::Matcher::new(config.clone()));
matcher.config = config;
matcher
}
pub trait MatchCandidate {
fn has_chars(&self, bag: CharBag) -> bool;
fn candidate_chars(&self) -> impl Iterator<Item = char>;
pub fn return_matcher(matcher: nucleo::Matcher) {
MATCHERS.lock().unwrap().push(matcher);
}
impl<'a> Matcher<'a> {
pub fn new(
query: &'a [char],
lowercase_query: &'a [char],
query_char_bag: CharBag,
smart_case: bool,
penalize_length: bool,
) -> Self {
Self {
query,
lowercase_query,
query_char_bag,
min_score: 0.0,
last_positions: vec![0; lowercase_query.len()],
match_positions: vec![0; query.len()],
score_matrix: Vec::new(),
best_position_matrix: Vec::new(),
smart_case,
penalize_length,
}
}
/// Filter and score fuzzy match candidates. Results are returned unsorted, in the same order as
/// the input candidates.
pub(crate) fn match_candidates<C, R, F, T>(
&mut self,
prefix: &[char],
lowercase_prefix: &[char],
candidates: impl Iterator<Item = T>,
results: &mut Vec<R>,
cancel_flag: &AtomicBool,
build_match: F,
) where
C: MatchCandidate,
T: Borrow<C>,
F: Fn(&C, f64, &Vec<usize>) -> R,
{
let mut candidate_chars = Vec::new();
let mut lowercase_candidate_chars = Vec::new();
let mut extra_lowercase_chars = BTreeMap::new();
for candidate in candidates {
if !candidate.borrow().has_chars(self.query_char_bag) {
continue;
}
if cancel_flag.load(atomic::Ordering::Acquire) {
break;
}
candidate_chars.clear();
lowercase_candidate_chars.clear();
extra_lowercase_chars.clear();
for (i, c) in candidate.borrow().candidate_chars().enumerate() {
candidate_chars.push(c);
let mut char_lowercased = c.to_lowercase().collect::<Vec<_>>();
if char_lowercased.len() > 1 {
extra_lowercase_chars.insert(i, char_lowercased.len() - 1);
}
lowercase_candidate_chars.append(&mut char_lowercased);
}
if !self.find_last_positions(lowercase_prefix, &lowercase_candidate_chars) {
continue;
}
let matrix_len = self.query.len() * (prefix.len() + candidate_chars.len());
self.score_matrix.clear();
self.score_matrix.resize(matrix_len, None);
self.best_position_matrix.clear();
self.best_position_matrix.resize(matrix_len, 0);
let score = self.score_match(
&candidate_chars,
&lowercase_candidate_chars,
prefix,
lowercase_prefix,
&extra_lowercase_chars,
);
if score > 0.0 {
results.push(build_match(
candidate.borrow(),
score,
&self.match_positions,
));
}
}
}
fn find_last_positions(
&mut self,
lowercase_prefix: &[char],
lowercase_candidate: &[char],
) -> bool {
let mut lowercase_prefix = lowercase_prefix.iter();
let mut lowercase_candidate = lowercase_candidate.iter();
for (i, char) in self.lowercase_query.iter().enumerate().rev() {
if let Some(j) = lowercase_candidate.rposition(|c| c == char) {
self.last_positions[i] = j + lowercase_prefix.len();
} else if let Some(j) = lowercase_prefix.rposition(|c| c == char) {
self.last_positions[i] = j;
} else {
return false;
}
}
true
}
fn score_match(
&mut self,
path: &[char],
path_lowercased: &[char],
prefix: &[char],
lowercase_prefix: &[char],
extra_lowercase_chars: &BTreeMap<usize, usize>,
) -> f64 {
let score = self.recursive_score_match(
path,
path_lowercased,
prefix,
lowercase_prefix,
0,
0,
self.query.len() as f64,
extra_lowercase_chars,
) * self.query.len() as f64;
if score <= 0.0 {
return 0.0;
}
let path_len = prefix.len() + path.len();
let mut cur_start = 0;
let mut byte_ix = 0;
let mut char_ix = 0;
for i in 0..self.query.len() {
let match_char_ix = self.best_position_matrix[i * path_len + cur_start];
while char_ix < match_char_ix {
let ch = prefix
.get(char_ix)
.or_else(|| path.get(char_ix - prefix.len()))
.unwrap();
byte_ix += ch.len_utf8();
char_ix += 1;
}
self.match_positions[i] = byte_ix;
let matched_ch = prefix
.get(match_char_ix)
.or_else(|| path.get(match_char_ix - prefix.len()))
.unwrap();
byte_ix += matched_ch.len_utf8();
cur_start = match_char_ix + 1;
char_ix = match_char_ix + 1;
}
score
}
fn recursive_score_match(
&mut self,
path: &[char],
path_lowercased: &[char],
prefix: &[char],
lowercase_prefix: &[char],
query_idx: usize,
path_idx: usize,
cur_score: f64,
extra_lowercase_chars: &BTreeMap<usize, usize>,
) -> f64 {
if query_idx == self.query.len() {
return 1.0;
}
let limit = self.last_positions[query_idx];
let max_valid_index = (prefix.len() + path_lowercased.len()).saturating_sub(1);
let safe_limit = limit.min(max_valid_index);
if path_idx > safe_limit {
return 0.0;
}
let path_len = prefix.len() + path.len();
if let Some(memoized) = self.score_matrix[query_idx * path_len + path_idx] {
return memoized;
}
let mut score = 0.0;
let mut best_position = 0;
let query_char = self.lowercase_query[query_idx];
let mut last_slash = 0;
for j in path_idx..=safe_limit {
let extra_lowercase_chars_count = extra_lowercase_chars
.iter()
.take_while(|&(&i, _)| i < j)
.map(|(_, increment)| increment)
.sum::<usize>();
let j_regular = j - extra_lowercase_chars_count;
let path_char = if j < prefix.len() {
lowercase_prefix[j]
} else {
let path_index = j - prefix.len();
match path_lowercased.get(path_index) {
Some(&char) => char,
None => continue,
}
};
let is_path_sep = path_char == '/';
if query_idx == 0 && is_path_sep {
last_slash = j_regular;
}
let need_to_score = query_char == path_char || (is_path_sep && query_char == '_');
if need_to_score {
let curr = match prefix.get(j_regular) {
Some(&curr) => curr,
None => path[j_regular - prefix.len()],
};
let mut char_score = 1.0;
if j > path_idx {
let last = match prefix.get(j_regular - 1) {
Some(&last) => last,
None => path[j_regular - 1 - prefix.len()],
};
if last == '/' {
char_score = 0.9;
} else if (last == '-' || last == '_' || last == ' ' || last.is_numeric())
|| (last.is_lowercase() && curr.is_uppercase())
{
char_score = 0.8;
} else if last == '.' {
char_score = 0.7;
} else if query_idx == 0 {
char_score = BASE_DISTANCE_PENALTY;
} else {
char_score = MIN_DISTANCE_PENALTY.max(
BASE_DISTANCE_PENALTY
- (j - path_idx - 1) as f64 * ADDITIONAL_DISTANCE_PENALTY,
);
}
}
// Apply a severe penalty if the case doesn't match.
// This will make the exact matches have higher score than the case-insensitive and the
// path insensitive matches.
if (self.smart_case || curr == '/') && self.query[query_idx] != curr {
char_score *= 0.001;
}
let mut multiplier = char_score;
// Scale the score based on how deep within the path we found the match.
if self.penalize_length && query_idx == 0 {
multiplier /= ((prefix.len() + path.len()) - last_slash) as f64;
}
let mut next_score = 1.0;
if self.min_score > 0.0 {
next_score = cur_score * multiplier;
// Scores only decrease. If we can't pass the previous best, bail
if next_score < self.min_score {
// Ensure that score is non-zero so we use it in the memo table.
if score == 0.0 {
score = 1e-18;
}
continue;
}
}
let new_score = self.recursive_score_match(
path,
path_lowercased,
prefix,
lowercase_prefix,
query_idx + 1,
j + 1,
next_score,
extra_lowercase_chars,
) * multiplier;
if new_score > score {
score = new_score;
best_position = j_regular;
// Optimization: can't score better than 1.
if new_score == 1.0 {
break;
}
}
}
}
if best_position != 0 {
self.best_position_matrix[query_idx * path_len + path_idx] = best_position;
}
self.score_matrix[query_idx * path_len + path_idx] = Some(score);
score
}
}
#[cfg(test)]
mod tests {
use util::rel_path::{RelPath, rel_path};
use crate::{PathMatch, PathMatchCandidate};
use super::*;
use std::sync::Arc;
#[test]
fn test_get_last_positions() {
let mut query: &[char] = &['d', 'c'];
let mut matcher = Matcher::new(query, query, query.into(), false, true);
let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
assert!(!result);
query = &['c', 'd'];
let mut matcher = Matcher::new(query, query, query.into(), false, true);
let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
assert!(result);
assert_eq!(matcher.last_positions, vec![2, 4]);
query = &['z', '/', 'z', 'f'];
let mut matcher = Matcher::new(query, query, query.into(), false, true);
let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']);
assert!(result);
assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]);
}
#[test]
fn test_match_path_entries() {
let paths = vec![
"",
"a",
"ab",
"abC",
"abcd",
"alphabravocharlie",
"AlphaBravoCharlie",
"thisisatestdir",
"ThisIsATestDir",
"this/is/a/test/dir",
"test/tiatd",
];
assert_eq!(
match_single_path_query("abc", false, &paths),
vec![
("abC", vec![0, 1, 2]),
("abcd", vec![0, 1, 2]),
("AlphaBravoCharlie", vec![0, 5, 10]),
("alphabravocharlie", vec![4, 5, 10]),
]
);
assert_eq!(
match_single_path_query("t/i/a/t/d", false, &paths),
vec![("this/is/a/test/dir", vec![0, 4, 5, 7, 8, 9, 10, 14, 15]),]
);
assert_eq!(
match_single_path_query("tiatd", false, &paths),
vec![
("test/tiatd", vec![5, 6, 7, 8, 9]),
("ThisIsATestDir", vec![0, 4, 6, 7, 11]),
("this/is/a/test/dir", vec![0, 5, 8, 10, 15]),
("thisisatestdir", vec![0, 2, 6, 7, 11]),
]
);
}
#[test]
fn test_lowercase_longer_than_uppercase() {
// This character has more chars in lower-case than in upper-case.
let paths = vec!["\u{0130}"];
let query = "\u{0130}";
assert_eq!(
match_single_path_query(query, false, &paths),
vec![("\u{0130}", vec![0])]
);
// Path is the lower-case version of the query
let paths = vec!["i\u{307}"];
let query = "\u{0130}";
assert_eq!(
match_single_path_query(query, false, &paths),
vec![("i\u{307}", vec![0])]
);
}
#[test]
fn test_match_multibyte_path_entries() {
let paths = vec![
"aαbβ/cγ",
"αβγδ/bcde",
"c1⃣2⃣3⃣/d4⃣5⃣6⃣/e7⃣8⃣9⃣/f",
"d/🆒/h",
];
assert_eq!("1".len(), 7);
assert_eq!(
match_single_path_query("bcd", false, &paths),
vec![
("αβγδ/bcde", vec![9, 10, 11]),
("aαbβ/cγ", vec![3, 7, 10]),
]
);
assert_eq!(
match_single_path_query("cde", false, &paths),
vec![
("αβγδ/bcde", vec![10, 11, 12]),
("c1⃣2⃣3⃣/d4⃣5⃣6⃣/e7⃣8⃣9⃣/f", vec![0, 23, 46]),
]
);
}
#[test]
fn match_unicode_path_entries() {
let mixed_unicode_paths = vec![
"İolu/oluş",
"İstanbul/code",
"Athens/Şanlıurfa",
"Çanakkale/scripts",
"paris/Düzce_İl",
"Berlin_Önemli_Ğündem",
"KİTAPLIK/london/dosya",
"tokyo/kyoto/fuji",
"new_york/san_francisco",
];
assert_eq!(
match_single_path_query("İo/oluş", false, &mixed_unicode_paths),
vec![("İolu/oluş", vec![0, 2, 4, 6, 8, 10, 12])]
);
assert_eq!(
match_single_path_query("İst/code", false, &mixed_unicode_paths),
vec![("İstanbul/code", vec![0, 2, 4, 6, 8, 10, 12, 14])]
);
assert_eq!(
match_single_path_query("athens/şa", false, &mixed_unicode_paths),
vec![("Athens/Şanlıurfa", vec![0, 1, 2, 3, 4, 5, 6, 7, 9])]
);
assert_eq!(
match_single_path_query("BerlinÖĞ", false, &mixed_unicode_paths),
vec![("Berlin_Önemli_Ğündem", vec![0, 1, 2, 3, 4, 5, 7, 15])]
);
assert_eq!(
match_single_path_query("tokyo/fuji", false, &mixed_unicode_paths),
vec![("tokyo/kyoto/fuji", vec![0, 1, 2, 3, 4, 5, 12, 13, 14, 15])]
);
let mixed_script_paths = vec![
"résumé_Москва",
"naïve_київ_implementation",
"café_北京_app",
"東京_über_driver",
"déjà_vu_cairo",
"seoul_piñata_game",
"voilà_istanbul_result",
];
assert_eq!(
match_single_path_query("résmé", false, &mixed_script_paths),
vec![("résumé_Москва", vec![0, 1, 3, 5, 6])]
);
assert_eq!(
match_single_path_query("café北京", false, &mixed_script_paths),
vec![("café_北京_app", vec![0, 1, 2, 3, 6, 9])]
);
assert_eq!(
match_single_path_query("ista", false, &mixed_script_paths),
vec![("voilà_istanbul_result", vec![7, 8, 9, 10])]
);
let complex_paths = vec![
"document_📚_library",
"project_👨👩👧👦_family",
"flags_🇯🇵🇺🇸🇪🇺_world",
"code_😀😃😄😁_happy",
"photo_👩👩👧👦_album",
];
assert_eq!(
match_single_path_query("doc📚lib", false, &complex_paths),
vec![("document_📚_library", vec![0, 1, 2, 9, 14, 15, 16])]
);
assert_eq!(
match_single_path_query("codehappy", false, &complex_paths),
vec![("code_😀😃😄😁_happy", vec![0, 1, 2, 3, 22, 23, 24, 25, 26])]
);
}
fn match_single_path_query<'a>(
query: &str,
smart_case: bool,
paths: &[&'a str],
) -> Vec<(&'a str, Vec<usize>)> {
let lowercase_query = query.to_lowercase().chars().collect::<Vec<_>>();
let query = query.chars().collect::<Vec<_>>();
let query_chars = CharBag::from(&lowercase_query[..]);
let path_arcs: Vec<Arc<RelPath>> = paths
.iter()
.map(|path| Arc::from(rel_path(path)))
.collect::<Vec<_>>();
let mut path_entries = Vec::new();
for (i, path) in paths.iter().enumerate() {
let lowercase_path = path.to_lowercase().chars().collect::<Vec<_>>();
let char_bag = CharBag::from(lowercase_path.as_slice());
path_entries.push(PathMatchCandidate {
is_dir: false,
char_bag,
path: &path_arcs[i],
});
}
let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case, true);
let cancel_flag = AtomicBool::new(false);
let mut results = Vec::new();
matcher.match_candidates(
&[],
&[],
path_entries.into_iter(),
&mut results,
&cancel_flag,
|candidate, score, positions| PathMatch {
score,
worktree_id: 0,
positions: positions.clone(),
path: candidate.path.into(),
path_prefix: RelPath::empty().into(),
distance_to_relative_ancestor: usize::MAX,
is_dir: false,
},
);
results.sort_by(|a, b| b.cmp(a));
results
.into_iter()
.map(|result| {
(
paths
.iter()
.copied()
.find(|p| result.path.as_ref() == rel_path(p))
.unwrap(),
result.positions,
)
pub fn get_matchers(n: usize, config: nucleo::Config) -> Vec<nucleo::Matcher> {
let mut matchers: Vec<_> = {
let mut matchers = MATCHERS.lock().unwrap();
let num_matchers = matchers.len();
matchers
.drain(0..std::cmp::min(n, num_matchers))
.map(|mut matcher| {
matcher.config = config.clone();
matcher
})
.collect()
}
};
matchers.resize_with(n, || nucleo::Matcher::new(config.clone()));
matchers
}
pub fn return_matchers(mut matchers: Vec<nucleo::Matcher>) {
MATCHERS.lock().unwrap().append(&mut matchers);
}

View File

@@ -1,4 +1,5 @@
use gpui::BackgroundExecutor;
use nucleo::pattern::{AtomKind, CaseMatching, Normalization, Pattern};
use std::{
cmp::{self, Ordering},
sync::{
@@ -8,10 +9,7 @@ use std::{
};
use util::{paths::PathStyle, rel_path::RelPath};
use crate::{
CharBag,
matcher::{MatchCandidate, Matcher},
};
use crate::{CharBag, matcher};
#[derive(Clone, Debug)]
pub struct PathMatchCandidate<'a> {
@@ -23,6 +21,7 @@ pub struct PathMatchCandidate<'a> {
#[derive(Clone, Debug)]
pub struct PathMatch {
pub score: f64,
/// Guarenteed to be sorted, chars from the start of the string
pub positions: Vec<usize>,
pub worktree_id: usize,
pub path: Arc<RelPath>,
@@ -33,6 +32,8 @@ pub struct PathMatch {
pub distance_to_relative_ancestor: usize,
}
// This has only one implementation. It's here to invert dependencies so fuzzy
// does not need to depend on project. Though we also use it to make testing easier.
pub trait PathMatchCandidateSet<'a>: Send + Sync {
type Candidates: Iterator<Item = PathMatchCandidate<'a>>;
fn id(&self) -> usize;
@@ -46,16 +47,6 @@ pub trait PathMatchCandidateSet<'a>: Send + Sync {
fn path_style(&self) -> PathStyle;
}
impl<'a> MatchCandidate for PathMatchCandidate<'a> {
fn has_chars(&self, bag: CharBag) -> bool {
self.char_bag.is_superset(bag)
}
fn candidate_chars(&self) -> impl Iterator<Item = char> {
self.path.as_unix_str().chars()
}
}
impl PartialEq for PathMatch {
fn eq(&self, other: &Self) -> bool {
self.cmp(other).is_eq()
@@ -72,19 +63,48 @@ impl PartialOrd for PathMatch {
impl Ord for PathMatch {
fn cmp(&self, other: &Self) -> Ordering {
println!(
"{:?}: {}, {:?} {}",
self.path, self.score, other.path, other.score
);
self.score
.partial_cmp(&other.score)
.unwrap_or(Ordering::Equal)
.total_cmp(&other.score)
.reverse()
.then_with(|| self.worktree_id.cmp(&other.worktree_id))
.then_with(|| {
other
.distance_to_relative_ancestor
.cmp(&self.distance_to_relative_ancestor)
})
.then_with(|| {
self.distance_from_end()
.total_cmp(&other.distance_from_end())
})
// see shorter_over_lexicographical test for an example of why we want this
.then_with(|| {
self.path
.as_unix_str()
.chars()
.count()
.cmp(&other.path.as_unix_str().chars().count())
})
.then_with(|| self.path.cmp(&other.path))
}
}
impl PathMatch {
fn distance_from_end(&self) -> f32 {
let len = self.path_prefix.as_unix_str().chars().count()
+ 1
+ self.path.as_unix_str().chars().count(); // add one for path separator
dbg!(&self.path, &self.path_prefix);
self.positions
.iter()
.map(|p| (dbg!(len) - dbg!(p)) as f32 / 1000.0)
.sum()
}
}
pub fn match_fixed_path_set(
candidates: Vec<PathMatchCandidate>,
worktree_id: usize,
@@ -94,54 +114,106 @@ pub fn match_fixed_path_set(
max_results: usize,
path_style: PathStyle,
) -> Vec<PathMatch> {
let lowercase_query = query.to_lowercase().chars().collect::<Vec<_>>();
let query = query.chars().collect::<Vec<_>>();
let query_char_bag = CharBag::from(&lowercase_query[..]);
let mut matcher = Matcher::new(&query, &lowercase_query, query_char_bag, smart_case, true);
let mut config = nucleo::Config::DEFAULT;
config.set_match_paths();
let mut matcher = matcher::get_matcher(config);
let pattern = Pattern::new(
query,
if smart_case {
CaseMatching::Smart
} else {
CaseMatching::Ignore
},
Normalization::Smart,
AtomKind::Fuzzy,
);
let mut results = Vec::with_capacity(candidates.len());
let (path_prefix, path_prefix_chars, lowercase_prefix) = match worktree_root_name {
Some(worktree_root_name) => {
let mut path_prefix_chars = worktree_root_name
.display(path_style)
.chars()
.collect::<Vec<_>>();
path_prefix_chars.extend(path_style.separator().chars());
let lowercase_pfx = path_prefix_chars
.iter()
.map(|c| c.to_ascii_lowercase())
.collect::<Vec<_>>();
(worktree_root_name, path_prefix_chars, lowercase_pfx)
}
None => (
RelPath::empty().into(),
Default::default(),
Default::default(),
),
};
matcher.match_candidates(
&path_prefix_chars,
&lowercase_prefix,
path_match_helper(
&mut matcher,
&pattern,
candidates.into_iter(),
&mut results,
worktree_id,
&worktree_root_name
.clone()
.unwrap_or(RelPath::empty().into()),
&None,
path_style,
&AtomicBool::new(false),
|candidate, score, positions| PathMatch {
score,
worktree_id,
positions: positions.clone(),
is_dir: candidate.is_dir,
path: candidate.path.into(),
path_prefix: path_prefix.clone(),
distance_to_relative_ancestor: usize::MAX,
},
);
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
&mut results,
)
.ok();
matcher::return_matcher(matcher);
util::truncate_to_bottom_n_sorted(&mut results, max_results);
for r in &mut results {
r.positions.sort();
}
results
}
struct Cancelled;
fn path_match_helper<'a>(
matcher: &mut nucleo::Matcher,
pattern: &Pattern,
candidates: impl Iterator<Item = PathMatchCandidate<'a>>,
worktree_id: usize,
path_prefix: &Arc<RelPath>,
relative_to: &Option<Arc<RelPath>>,
path_style: PathStyle,
cancel_flag: &AtomicBool,
results: &mut Vec<PathMatch>,
) -> std::result::Result<(), Cancelled> {
let mut candidate_buf = path_prefix.display(path_style).to_string();
if !path_prefix.is_empty() {
candidate_buf.push_str(path_style.separator());
}
let path_prefix_len = candidate_buf.len();
for c in candidates {
if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
return Err(Cancelled);
}
let mut indices = Vec::new();
let mut buf = Vec::new();
candidate_buf.truncate(path_prefix_len);
candidate_buf.push_str(c.path.as_unix_str());
// TODO: need to convert indices/positions from char offsets to byte offsets.
if let Some(score) = pattern.indices(
nucleo::Utf32Str::new(dbg!(&candidate_buf), &mut buf),
matcher,
&mut indices,
) {
// TODO: walk both in order for better perf
let positions: Vec<_> = candidate_buf
.char_indices()
.enumerate()
.filter_map(|(char_offset, (byte_offset, _))| {
indices
.contains(&(char_offset as u32))
.then_some(byte_offset)
})
.collect();
results.push(PathMatch {
score: score as f64,
worktree_id,
positions,
is_dir: c.is_dir,
path: c.path.into(),
path_prefix: Arc::clone(&path_prefix),
distance_to_relative_ancestor: relative_to
.as_ref()
.map_or(usize::MAX, |relative_to| {
distance_between_paths(c.path, relative_to.as_ref())
}),
})
};
}
Ok(())
}
/// Query should contain spaces if you want it to be matched out of order
/// for example: 'audio Cargo' matching 'audio/Cargo.toml'
pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
candidate_sets: &'a [Set],
query: &str,
@@ -155,28 +227,26 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
if path_count == 0 {
return Vec::new();
}
dbg!(relative_to);
let path_style = candidate_sets[0].path_style();
let query = query
.chars()
.map(|char| {
if path_style.is_windows() && char == '\\' {
'/'
} else {
char
}
})
.collect::<Vec<_>>();
let query = if path_style.is_windows() {
query.replace('\\', "/")
} else {
query.to_owned()
};
let lowercase_query = query
.iter()
.map(|query| query.to_ascii_lowercase())
.collect::<Vec<_>>();
let query = &query;
let lowercase_query = &lowercase_query;
let query_char_bag = CharBag::from_iter(lowercase_query.iter().copied());
let pattern = Pattern::new(
&query,
if smart_case {
CaseMatching::Smart
} else {
CaseMatching::Ignore
},
Normalization::Smart,
AtomKind::Fuzzy,
);
let num_cpus = executor.num_cpus().min(path_count);
let segment_size = path_count.div_ceil(num_cpus);
@@ -184,21 +254,28 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
.map(|_| Vec::with_capacity(max_results))
.collect::<Vec<_>>();
let mut config = nucleo::Config::DEFAULT;
config.set_match_paths();
let mut matchers = matcher::get_matchers(num_cpus, config);
// This runs num_cpu parallel searches. Each search is going through all candidate sets
// Each parallel search goes through one segment of the every candidate set. The segments are
// not overlapping.
executor
.scoped(|scope| {
for (segment_idx, results) in segment_results.iter_mut().enumerate() {
for (segment_idx, (results, matcher)) in segment_results
.iter_mut()
.zip(matchers.iter_mut())
.enumerate()
{
let relative_to = relative_to.clone();
let pattern = pattern.clone();
scope.spawn(async move {
let segment_start = segment_idx * segment_size;
let segment_end = segment_start + segment_size;
let mut matcher =
Matcher::new(query, lowercase_query, query_char_bag, smart_case, true);
let mut tree_start = 0;
for candidate_set in candidate_sets {
if cancel_flag.load(atomic::Ordering::Acquire) {
break;
}
let tree_end = tree_start + candidate_set.len();
if tree_start < segment_end && segment_start < tree_end {
@@ -215,34 +292,22 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
if !candidate_set.root_is_file() && !prefix.is_empty() {
prefix.push('/');
}
let lowercase_prefix = prefix
.iter()
.map(|c| c.to_ascii_lowercase())
.collect::<Vec<_>>();
matcher.match_candidates(
&prefix,
&lowercase_prefix,
if path_match_helper(
matcher,
&pattern,
candidates,
results,
worktree_id,
&candidate_set.prefix(),
&relative_to,
path_style,
cancel_flag,
|candidate, score, positions| PathMatch {
score,
worktree_id,
positions: positions.clone(),
path: Arc::from(candidate.path),
is_dir: candidate.is_dir,
path_prefix: candidate_set.prefix(),
distance_to_relative_ancestor: relative_to.as_ref().map_or(
usize::MAX,
|relative_to| {
distance_between_paths(
candidate.path,
relative_to.as_ref(),
)
},
),
},
);
results,
)
.is_err()
{
break;
}
}
if tree_end >= segment_end {
break;
@@ -258,8 +323,14 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
return Vec::new();
}
matcher::return_matchers(matchers);
let mut results = segment_results.concat();
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
util::truncate_to_bottom_n_sorted(&mut results, max_results);
for r in &mut results {
r.positions.sort();
}
results
}
@@ -280,7 +351,12 @@ fn distance_between_paths(path: &RelPath, relative_to: &RelPath) -> usize {
#[cfg(test)]
mod tests {
use util::rel_path::RelPath;
use std::sync::{Arc, atomic::AtomicBool};
use gpui::TestAppContext;
use util::{paths::PathStyle, rel_path::RelPath};
use crate::{CharBag, PathMatchCandidate, PathMatchCandidateSet};
use super::distance_between_paths;
@@ -288,4 +364,197 @@ mod tests {
fn test_distance_between_paths_empty() {
distance_between_paths(RelPath::empty(), RelPath::empty());
}
struct TestCandidateSet<'a> {
prefix: Arc<RelPath>,
candidates: Vec<PathMatchCandidate<'a>>,
path_style: PathStyle,
}
impl<'a> PathMatchCandidateSet<'a> for TestCandidateSet<'a> {
type Candidates = std::vec::IntoIter<PathMatchCandidate<'a>>;
fn id(&self) -> usize {
0
}
fn len(&self) -> usize {
self.candidates.len()
}
fn is_empty(&self) -> bool {
self.candidates.is_empty()
}
fn root_is_file(&self) -> bool {
true // TODO: swap this
}
fn prefix(&self) -> Arc<RelPath> {
self.prefix.clone()
}
fn candidates(&self, start: usize) -> Self::Candidates {
self.candidates[start..]
.iter()
.cloned()
.collect::<Vec<_>>()
.into_iter()
}
fn path_style(&self) -> PathStyle {
self.path_style
}
}
async fn path_matches(
cx: &mut TestAppContext,
candidates: &'static [&'static str],
query: &'static str,
prefix: Option<&str>,
) -> Vec<String> {
let set = TestCandidateSet {
prefix: RelPath::unix(prefix.unwrap_or_default()).unwrap().into(),
candidates: candidates
.into_iter()
.map(|s| PathMatchCandidate {
is_dir: false,
path: RelPath::unix(s).unwrap().into(),
char_bag: CharBag::from_iter(s.to_lowercase().chars()),
})
.collect(),
path_style: PathStyle::Windows,
};
let candidate_sets = vec![set];
let cancellation_flag = AtomicBool::new(false);
let executor = cx.background_executor.clone();
let matches = cx
.foreground_executor
.spawn(async move {
super::match_path_sets(
candidate_sets.as_slice(),
query,
&None,
false,
100,
&cancellation_flag,
executor,
)
.await
})
.await;
matches
.iter()
.map(|s| s.path.as_unix_str().to_string())
.collect::<Vec<_>>()
}
#[gpui::test]
async fn test_dir_paths(cx: &mut TestAppContext) {
const CANDIDATES: &'static [&'static str] = &[
"gpui_even_more/Cargo.toml",
"gpui_more/Cargo.toml",
"gpui/Cargo.toml",
];
assert_eq!(
path_matches(cx, CANDIDATES, "toml gpui", None).await,
[
"gpui/Cargo.toml",
"gpui_more/Cargo.toml",
"gpui_even_more/Cargo.toml",
]
);
assert_eq!(
path_matches(cx, CANDIDATES, "gpui more", None).await,
["gpui_more/Cargo.toml", "gpui_even_more/Cargo.toml",]
);
}
#[gpui::test]
async fn test_more_dir_paths(cx: &mut TestAppContext) {
const CANDIDATES: &'static [&'static str] = &[
"crates/gpui_macros/Cargo.toml",
"crates/gpui_tokio/Cargo.toml",
"crates/gpui/Cargo.toml",
];
assert_eq!(
path_matches(cx, CANDIDATES, "toml gpui", None).await,
[
"crates/gpui/Cargo.toml",
"crates/gpui_tokio/Cargo.toml",
"crates/gpui_macros/Cargo.toml"
]
);
}
#[gpui::test]
async fn denoise(cx: &mut TestAppContext) {
const CANDIDATES: &'static [&'static str] = &[
"crates/debug_adapter_extension/Cargo.toml",
"crates/debugger_tools/Cargo.toml",
"crates/debugger_ui/Cargo.toml",
"crates/deepseek/Cargo.toml",
"crates/denoise/Cargo.toml",
];
assert_eq!(
path_matches(cx, CANDIDATES, "toml de", None).await,
[
"crates/denoise/Cargo.toml",
"crates/deepseek/Cargo.toml",
"crates/debugger_ui/Cargo.toml",
"crates/debugger_tools/Cargo.toml",
"crates/debug_adapter_extension/Cargo.toml",
]
);
}
#[gpui::test]
async fn test_path_matcher(cx: &mut TestAppContext) {
const CANDIDATES: &'static [&'static str] = &[
"blue", "red", "purple", "pink", "green", "yellow", "magenta", "orange", "ocean",
"navy", "brown",
];
assert_eq!(path_matches(cx, CANDIDATES, "bl", None).await, ["blue"]);
}
#[gpui::test]
async fn shorter_over_lexicographical(cx: &mut TestAppContext) {
const CANDIDATES: &'static [&'static str] = &["qr", "qqqqqqqqqqqq"];
assert_eq!(
path_matches(cx, CANDIDATES, "q", None).await,
["qr", "qqqqqqqqqqqq"]
);
}
// TODO: add perf test on zed repo
#[gpui::test]
async fn prefer_single_word_match_to_multiple_fragments(cx: &mut TestAppContext) {
const CANDIDATES: &'static [&'static str] = &[
"crates/theme_importer/README.md",
"extensions/test-extension/README.md",
"extensions/slash-commands-example/README.md",
"crates/livekit_api/vendored/protocol/README.md",
"crates/assistant_tools/src/read_file_tool/description.md",
];
assert_eq!(path_matches(cx, CANDIDATES, "read", None).await, CANDIDATES);
}
#[gpui::test]
async fn paprika(cx: &mut TestAppContext) {
const CANDIDATES: &'static [&'static str] = &["bar/neat.txt", "foo/bar.txt"];
assert_eq!(
path_matches(cx, CANDIDATES, "bar", None).await,
["foo/bar.txt", "bar/neat.txt"]
);
}
#[gpui::test]
async fn aubergine(cx: &mut TestAppContext) {
const CANDIDATES: &'static [&'static str] =
&["vim_mode_setting/Cargo.toml", "vim/Cargo.toml"];
assert_eq!(
path_matches(cx, CANDIDATES, "Cargo.to vim", Some("crates")).await,
[CANDIDATES[1], CANDIDATES[0]]
);
}
}

View File

@@ -1,14 +1,11 @@
use crate::{
CharBag,
matcher::{MatchCandidate, Matcher},
};
use crate::{CharBag, matcher};
use gpui::BackgroundExecutor;
use nucleo::pattern::{AtomKind, CaseMatching, Normalization, Pattern};
use std::{
borrow::Borrow,
cmp::{self, Ordering},
iter,
cmp, iter,
ops::Range,
sync::atomic::{self, AtomicBool},
sync::atomic::{AtomicBool, Ordering},
};
#[derive(Clone, Debug)]
@@ -28,16 +25,6 @@ impl StringMatchCandidate {
}
}
impl MatchCandidate for &StringMatchCandidate {
fn has_chars(&self, bag: CharBag) -> bool {
self.char_bag.is_superset(bag)
}
fn candidate_chars(&self) -> impl Iterator<Item = char> {
self.string.chars()
}
}
#[derive(Clone, Debug)]
pub struct StringMatch {
pub candidate_id: usize,
@@ -99,17 +86,19 @@ impl PartialEq for StringMatch {
impl Eq for StringMatch {}
impl PartialOrd for StringMatch {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for StringMatch {
fn cmp(&self, other: &Self) -> Ordering {
fn cmp(&self, other: &Self) -> cmp::Ordering {
// dbg!(&self.string, self.score);
// dbg!(&other.string, other.score);
self.score
.partial_cmp(&other.score)
.unwrap_or(Ordering::Equal)
.then_with(|| self.candidate_id.cmp(&other.candidate_id))
.total_cmp(&other.score)
.reverse()
.then_with(|| self.string.cmp(&other.string))
}
}
@@ -117,7 +106,7 @@ pub async fn match_strings<T>(
candidates: &[T],
query: &str,
smart_case: bool,
penalize_length: bool,
prefer_shorter: bool,
max_results: usize,
cancel_flag: &AtomicBool,
executor: BackgroundExecutor,
@@ -128,6 +117,17 @@ where
if candidates.is_empty() || max_results == 0 {
return Default::default();
}
// FIXME should support fzf syntax with Pattern::parse
let pattern = Pattern::new(
query,
if smart_case {
CaseMatching::Smart
} else {
CaseMatching::Ignore
},
Normalization::Smart,
AtomKind::Fuzzy,
);
if query.is_empty() {
return candidates
@@ -141,59 +141,182 @@ where
.collect();
}
let lowercase_query = query.to_lowercase().chars().collect::<Vec<_>>();
let query = query.chars().collect::<Vec<_>>();
let lowercase_query = &lowercase_query;
let query = &query;
let query_char_bag = CharBag::from(&lowercase_query[..]);
let num_cpus = executor.num_cpus().min(candidates.len());
let segment_size = candidates.len().div_ceil(num_cpus);
let mut segment_results = (0..num_cpus)
.map(|_| Vec::with_capacity(max_results.min(candidates.len())))
.map(|_| Vec::<StringMatch>::with_capacity(max_results.min(candidates.len())))
.collect::<Vec<_>>();
let mut config = nucleo::Config::DEFAULT;
config.prefer_prefix = true; // TODO: consider making this a setting
let mut matchers = matcher::get_matchers(num_cpus, config);
executor
.scoped(|scope| {
for (segment_idx, results) in segment_results.iter_mut().enumerate() {
for (segment_idx, (results, matcher)) in segment_results
.iter_mut()
.zip(matchers.iter_mut())
.enumerate()
{
let cancel_flag = &cancel_flag;
let pattern = pattern.clone();
scope.spawn(async move {
let segment_start = cmp::min(segment_idx * segment_size, candidates.len());
let segment_end = cmp::min(segment_start + segment_size, candidates.len());
let mut matcher = Matcher::new(
query,
lowercase_query,
query_char_bag,
smart_case,
penalize_length,
);
matcher.match_candidates(
&[],
&[],
candidates[segment_start..segment_end]
.iter()
.map(|c| c.borrow()),
results,
cancel_flag,
|candidate: &&StringMatchCandidate, score, positions| StringMatch {
candidate_id: candidate.id,
score,
positions: positions.clone(),
string: candidate.string.to_string(),
},
);
for c in candidates[segment_start..segment_end].iter() {
if cancel_flag.load(Ordering::Relaxed) {
break;
}
let candidate = c.borrow();
let mut indices = Vec::new();
let mut buf = Vec::new();
if let Some(score) = pattern.indices(
nucleo::Utf32Str::new(&candidate.string, &mut buf),
matcher,
&mut indices,
) {
let length_modifier = candidate.string.chars().count() as f64 / 10_000.;
results.push(StringMatch {
candidate_id: candidate.id,
score: score as f64
+ if prefer_shorter {
-length_modifier
} else {
length_modifier
},
// TODO: need to convert indices/positions from char offsets to byte offsets.
positions: indices.into_iter().map(|n| n as usize).collect(),
string: candidate.string.clone(),
})
};
}
});
}
})
.await;
if cancel_flag.load(atomic::Ordering::Acquire) {
matcher::return_matchers(matchers);
if cancel_flag.load(Ordering::Relaxed) {
return Vec::new();
}
let mut results = segment_results.concat();
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
util::truncate_to_bottom_n_sorted(&mut results, max_results);
for r in &mut results {
r.positions.sort();
}
results
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::atomic::AtomicBool;
use gpui::TestAppContext;
async fn get_matches(
cx: &mut TestAppContext,
candidates: &[&'static str],
query: &'static str,
penalize_length: bool,
) -> Vec<StringMatch> {
let candidates: Vec<_> = candidates
.iter()
.enumerate()
.map(|(i, s)| StringMatchCandidate::new(i, s))
.collect();
let cancellation_flag = AtomicBool::new(false);
let executor = cx.background_executor.clone();
cx.foreground_executor
.spawn(async move {
super::match_strings(
&candidates,
query,
true,
penalize_length,
100,
&cancellation_flag,
executor,
)
.await
})
.await
}
async fn string_matches(
cx: &mut TestAppContext,
candidates: &[&'static str],
query: &'static str,
penalize_length: bool,
) -> Vec<String> {
let matches = get_matches(cx, candidates, query, penalize_length).await;
matches
.iter()
.map(|sm| dbg!(sm).string.clone())
.collect::<Vec<_>>()
}
async fn match_positions(
cx: &mut TestAppContext,
candidates: &[&'static str],
query: &'static str,
penalize_length: bool,
) -> Vec<usize> {
let mut matches = get_matches(cx, candidates, query, penalize_length).await;
matches.remove(0).positions
}
#[gpui::test]
async fn prefer_shorter_matches(cx: &mut TestAppContext) {
let candidates = &["a", "aa", "aaa"];
assert_eq!(
string_matches(cx, candidates, "a", true).await,
["a", "aa", "aaa"]
);
}
#[gpui::test]
async fn prefer_longer_matches(cx: &mut TestAppContext) {
let candidates = &["unreachable", "unreachable!()"];
assert_eq!(
string_matches(cx, candidates, "unreac", false).await,
["unreachable!()", "unreachable",]
);
}
#[gpui::test]
async fn shorter_over_lexicographical(cx: &mut TestAppContext) {
const CANDIDATES: &'static [&'static str] = &["qr", "qqqqqqqqqqqq"];
assert_eq!(
string_matches(cx, CANDIDATES, "q", true).await,
["qr", "qqqqqqqqqqqq"]
);
}
#[gpui::test]
async fn indices_are_sorted_and_correct(cx: &mut TestAppContext) {
const CANDIDATES: &'static [&'static str] = &["hello how are you"];
assert_eq!(
match_positions(cx, CANDIDATES, "you hello", true).await,
vec![0, 1, 2, 3, 4, 14, 15, 16]
);
// const CANDIDATES: &'static [&'static str] =
// &["crates/livekit_api/vendored/protocol/README.md"];
}
// This is broken?
#[gpui::test]
async fn broken_nucleo_matcher(cx: &mut TestAppContext) {
let candidates = &["lsp_code_lens", "code_lens"];
assert_eq!(
string_matches(cx, candidates, "lens", false).await,
["code_lens", "lsp_code_lens",]
);
}
}

View File

@@ -116,7 +116,6 @@ impl<T> Outline<T> {
pub async fn search(&self, query: &str, executor: BackgroundExecutor) -> Vec<StringMatch> {
let query = query.trim_start();
let is_path_query = query.contains(' ');
let smart_case = query.chars().any(|c| c.is_uppercase());
let mut matches = fuzzy::match_strings(
if is_path_query {
&self.path_candidates
@@ -124,7 +123,7 @@ impl<T> Outline<T> {
&self.candidates
},
query,
smart_case,
true,
true,
100,
&Default::default(),

View File

@@ -196,15 +196,12 @@ where
}
}
pub fn truncate_to_bottom_n_sorted_by<T, F>(items: &mut Vec<T>, limit: usize, compare: &F)
where
F: Fn(&T, &T) -> Ordering,
{
pub fn truncate_to_bottom_n_sorted<T: Ord>(items: &mut Vec<T>, limit: usize) {
if limit == 0 {
items.truncate(0);
}
if items.len() <= limit {
items.sort_by(compare);
items.sort();
return;
}
// When limit is near to items.len() it may be more efficient to sort the whole list and
@@ -213,9 +210,9 @@ where
// `select_nth_unstable_by` makes the prefix partially sorted, and so its work is not wasted -
// the expected number of comparisons needed by `sort_by` is less than it is for some arbitrary
// unsorted input.
items.select_nth_unstable_by(limit, compare);
items.select_nth_unstable(limit);
items.truncate(limit);
items.sort_by(compare);
items.sort();
}
/// Prevents execution of the application with root privileges on Unix systems.
@@ -1051,23 +1048,23 @@ mod tests {
#[test]
fn test_truncate_to_bottom_n_sorted_by() {
let mut vec: Vec<u32> = vec![5, 2, 3, 4, 1];
truncate_to_bottom_n_sorted_by(&mut vec, 10, &u32::cmp);
truncate_to_bottom_n_sorted(&mut vec, 10);
assert_eq!(vec, &[1, 2, 3, 4, 5]);
vec = vec![5, 2, 3, 4, 1];
truncate_to_bottom_n_sorted_by(&mut vec, 5, &u32::cmp);
truncate_to_bottom_n_sorted(&mut vec, 5);
assert_eq!(vec, &[1, 2, 3, 4, 5]);
vec = vec![5, 2, 3, 4, 1];
truncate_to_bottom_n_sorted_by(&mut vec, 4, &u32::cmp);
truncate_to_bottom_n_sorted(&mut vec, 4);
assert_eq!(vec, &[1, 2, 3, 4]);
vec = vec![5, 2, 3, 4, 1];
truncate_to_bottom_n_sorted_by(&mut vec, 1, &u32::cmp);
truncate_to_bottom_n_sorted(&mut vec, 1);
assert_eq!(vec, &[1]);
vec = vec![5, 2, 3, 4, 1];
truncate_to_bottom_n_sorted_by(&mut vec, 0, &u32::cmp);
truncate_to_bottom_n_sorted(&mut vec, 0);
assert!(vec.is_empty());
}